In [None]:
!pip install torch torchvision hypll geoopt

Collecting hypll
  Downloading hypll-0.1.1-py3-none-any.whl (34 kB)
Collecting geoopt
  Downloading geoopt-0.5.0-py3-none-any.whl (90 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.1/90.1 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==

### poincare embeddings tests



In [None]:
import random
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
import networkx as nx
from hypll.manifolds.poincare_ball import Curvature, PoincareBall
import hypll.nn as hnn
from hypll.optim import RiemannianSGD
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR100
from torchvision.transforms import ToTensor
from geoopt import ManifoldParameter
#

# Set random seed for reproducibility
torch.manual_seed(42)

# Set the device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1. Load the CIFAR-100 dataset and build a hierarchy
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

def build_cifar100_hierarchy():
    cifar100 = datasets.CIFAR100(root='./data', train=True, download=True)
    coarse_to_fine = {
        'aquatic mammals': ['beaver', 'dolphin', 'otter', 'seal', 'whale'],
        'fish': ['aquarium_fish', 'flatfish', 'ray', 'shark', 'trout'],
        'flowers': ['orchid', 'poppy', 'rose', 'sunflower', 'tulip'],
        'food containers': ['bottle', 'bowl', 'can', 'cup', 'plate'],
        'fruit and vegetables': ['apple', 'mushroom', 'orange', 'pear', 'sweet_pepper'],
        'household electrical devices': ['clock', 'keyboard', 'lamp', 'telephone', 'television'],
        'household furniture': ['bed', 'chair', 'couch', 'table', 'wardrobe'],
        'insects': ['bee', 'beetle', 'butterfly', 'caterpillar', 'cockroach'],
        'large carnivores': ['bear', 'leopard', 'lion', 'tiger', 'wolf'],
        'large man-made outdoor things': ['bridge', 'castle', 'house', 'road', 'skyscraper'],
        'large natural outdoor scenes': ['cloud', 'forest', 'mountain', 'plain', 'sea'],
        'large omnivores and herbivores': ['camel', 'cattle', 'chimpanzee', 'elephant', 'kangaroo'],
        'medium-sized mammals': ['fox', 'porcupine', 'possum', 'raccoon', 'skunk'],
        'non-insect invertebrates': ['crab', 'lobster', 'snail', 'spider', 'worm'],
        'people': ['baby', 'boy', 'girl', 'man', 'woman'],
        'reptiles': ['crocodile', 'dinosaur', 'lizard', 'snake', 'turtle'],
        'small mammals': ['hamster', 'mouse', 'rabbit', 'shrew', 'squirrel'],
        'trees': ['maple_tree', 'oak_tree', 'palm_tree', 'pine_tree', 'willow_tree'],
        'vehicles 1': ['bicycle', 'bus', 'motorcycle', 'pickup_truck', 'train'],
        'vehicles 2': ['lawn_mower', 'rocket', 'streetcar', 'tank', 'tractor'],
    }
    fine_labels = {label: i for i, label in enumerate(cifar100.classes)}
    graph = nx.DiGraph()

    coarse_labels = list(coarse_to_fine.keys())
    idx = 0
    l1, l2 = [], []
    for parent_id, parent_name in enumerate(coarse_labels):
        graph.add_node(idx, name=parent_name)
        l1.append(idx)
        idx += 1
        for child_name in coarse_to_fine[parent_name]:
            if child_name not in fine_labels:
                print(f"Error: '{child_name}' not found in fine_labels")
                continue
            child_id = fine_labels[child_name]
            graph.add_node(idx, name=child_name)
            graph.add_edge(parent_id, idx)
            l2.append(idx)
            idx += 1
    return graph, l2, l1

cifar100_graph, l_finer, l_hyper = build_cifar100_hierarchy()

# 2. Create a dataset containing the graph from which we can sample
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

class CIFAR100EmbeddingDataset(Dataset):
    def __init__(self, graph):
        super().__init__()
        self.graph = graph
        self.edges_list = list(graph.edges())

    def __len__(self):
        return len(self.edges_list)

    def __getitem__(self, idx):
        rel = self.edges_list[idx]
        negative_target_nodes = list(
            self.graph.nodes() - nx.descendants(self.graph, rel[0]) - {rel[0]}
        )
        negative_target_sample_size = min(5, len(negative_target_nodes))
        negative_target_nodes_sample = random.sample(
            negative_target_nodes, negative_target_sample_size
        )
        edges = torch.tensor([rel] + [[rel[0], neg] for neg in negative_target_nodes_sample])
        negative_source_nodes = list(
            self.graph.nodes() - nx.ancestors(self.graph, rel[1]) - {rel[1]}
        )
        negative_source_sample_size = 10 - negative_target_sample_size
        negative_source_nodes_sample = random.sample(
            negative_source_nodes, negative_source_sample_size
        )
        edges = torch.cat(
            tensors=(edges, torch.tensor([[neg, rel[1]] for neg in negative_source_nodes_sample])),
            dim=0,
        )
        edge_label_targets = torch.cat(tensors=[torch.ones(1).bool(), torch.zeros(10).bool()])
        return edges, edge_label_targets

dataset = CIFAR100EmbeddingDataset(cifar100_graph)
dataloader = DataLoader(dataset, batch_size=10, shuffle=True)

# 3. Initialize the Poincare ball on which the embeddings will be trained
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

poincare_ball = PoincareBall(Curvature(1.0))

# 4. Define the Poincare embedding model
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

class PoincareEmbedding(hnn.HEmbedding):
    def __init__(self, num_embeddings, embedding_dim, manifold):
        super().__init__(num_embeddings, embedding_dim, manifold)
        self.to(device)  # Move the model to the device

    def forward(self, edges):
        embeddings = super().forward(edges)
        edge_distances = self.manifold.dist(x=embeddings[:, :, 0, :], y=embeddings[:, :, 1, :])
        return edge_distances

model = PoincareEmbedding(
    num_embeddings=len(cifar100_graph.nodes()),
    embedding_dim=64,
    manifold=poincare_ball,
)

# 5. Define the Poincare embedding loss function
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

def poincare_embeddings_loss(dists, targets):
    logits = dists.neg().exp()
    numerator = torch.where(condition=targets, input=logits, other=0).sum(dim=-1)
    denominator = logits.sum(dim=-1)
    loss = (numerator / denominator).log().mean().neg()
    return loss

# 6. Perform a few "burn-in" training epochs with reduced learning rate
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

optimizer = RiemannianSGD(
    params=model.parameters(),
    lr=0.3 / 10,
)

for epoch in range(10):
    average_loss = 0
    for idx, (edges, edge_label_targets) in enumerate(dataloader):
        edges = edges.to(device)  # Move the data to the device
        edge_label_targets = edge_label_targets.to(device)  # Move the labels to the device
        optimizer.zero_grad()
        dists = model(edges)
        loss = poincare_embeddings_loss(dists=dists, targets=edge_label_targets)
        loss.backward()
        optimizer.step()
        average_loss += loss

    average_loss /= len(dataloader)
    print(f"Burn-in epoch {epoch} loss: {average_loss}")

optimizer = RiemannianSGD(
    params=model.parameters(),
    lr=0.3,
)

for epoch in range(300):
    average_loss = 0
    for idx, (edges, edge_label_targets) in enumerate(dataloader):
        edges = edges.to(device)  # Move the data to the device
        edge_label_targets = edge_label_targets.to(device)  # Move the labels to the device
        optimizer.zero_grad()
        dists = model(edges)
        loss = poincare_embeddings_loss(dists=dists, targets=edge_label_targets)
        loss.backward()
        optimizer.step()
        average_loss += loss

    average_loss /= len(dataloader)
    print(f"Epoch {epoch} loss: {average_loss}")

# You have now trained your own Poincare Embeddings on CIFAR-100!

embeddings = model.weight.data.cpu()
hyper_embedings = embeddings[l_hyper]
finer_embedings = embeddings[l_finer]

# Load the CIFAR-100 dataset
train_dataset = CIFAR100(root='./data', train=True, download=True, transform=ToTensor())
test_dataset = CIFAR100(root='./data', train=False, download=True, transform=ToTensor())

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:08<00:00, 19398420.42it/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Burn-in epoch 0 loss: 2.409217119216919
Burn-in epoch 1 loss: 2.407130241394043
Burn-in epoch 2 loss: 2.4043080806732178
Burn-in epoch 3 loss: 2.405734062194824
Burn-in epoch 4 loss: 2.4066648483276367
Burn-in epoch 5 loss: 2.402486562728882
Burn-in epoch 6 loss: 2.3978419303894043
Burn-in epoch 7 loss: 2.397519826889038
Burn-in epoch 8 loss: 2.391422748565674
Burn-in epoch 9 loss: 2.3890156745910645
Epoch 0 loss: 2.3899879455566406
Epoch 1 loss: 2.3695733547210693
Epoch 2 loss: 2.35863995552063
Epoch 3 loss: 2.3382985591888428
Epoch 4 loss: 2.320953845977783
Epoch 5 loss: 2.3123433589935303
Epoch 6 loss: 2.294006824493408
Epoch 7 loss: 2.287970781326294
Epoch 8 loss: 2.273292303085327
Epoch 9 loss: 2.2735350131988525
Epoch 10 loss: 2.261970281600952
Epoch 11 loss: 2.2709224224090576
Epoch 12 loss: 2.2589433193206787
Epoch 13 loss: 2.2602245807647705
Epoch 14 loss: 2.244173049926758
Epoch 15 loss: 2.2348856925964355
Epoch 16 loss: 2.2

In [None]:
import random
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
import networkx as nx
from hypll.manifolds.poincare_ball import Curvature, PoincareBall
import hypll.nn as hnn
from hypll.optim import RiemannianSGD

# 1. Load the CIFAR-100 dataset and build a hierarchy
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

# cifar100_graph = build_cifar100_hierarchy()
import networkx as nx
from torchvision import datasets
def build_cifar100_hierarchy_root():
    cifar100 = datasets.CIFAR100(root='./data', train=True, download=True)
    coarse_to_fine = {
        'aquatic mammals': ['beaver', 'dolphin', 'otter', 'seal', 'whale'],
        'fish': ['aquarium_fish', 'flatfish', 'ray', 'shark', 'trout'],
        'flowers': ['orchid', 'poppy', 'rose', 'sunflower', 'tulip'],
        'food containers': ['bottle', 'bowl', 'can', 'cup', 'plate'],
        'fruit and vegetables': ['apple', 'mushroom', 'orange', 'pear', 'sweet_pepper'],
        'household electrical devices': ['clock', 'keyboard', 'lamp', 'telephone', 'television'],
        'household furniture': ['bed', 'chair', 'couch', 'table', 'wardrobe'],
        'insects': ['bee', 'beetle', 'butterfly', 'caterpillar', 'cockroach'],
        'large carnivores': ['bear', 'leopard', 'lion', 'tiger', 'wolf'],
        'large man-made outdoor things': ['bridge', 'castle', 'house', 'road', 'skyscraper'],
        'large natural outdoor scenes': ['cloud', 'forest', 'mountain', 'plain', 'sea'],
        'large omnivores and herbivores': ['camel', 'cattle', 'chimpanzee', 'elephant', 'kangaroo'],
        'medium-sized mammals': ['fox', 'porcupine', 'possum', 'raccoon', 'skunk'],
        'non-insect invertebrates': ['crab', 'lobster', 'snail', 'spider', 'worm'],
        'people': ['baby', 'boy', 'girl', 'man', 'woman'],
        'reptiles': ['crocodile', 'dinosaur', 'lizard', 'snake', 'turtle'],
        'small mammals': ['hamster', 'mouse', 'rabbit', 'shrew', 'squirrel'],
        'trees': ['maple_tree', 'oak_tree', 'palm_tree', 'pine_tree', 'willow_tree'],
        'vehicles 1': ['bicycle', 'bus', 'motorcycle', 'pickup_truck', 'train'],
        'vehicles 2': ['lawn_mower', 'rocket', 'streetcar', 'tank', 'tractor'],
    }
    fine_labels = {label: i for i, label in enumerate(cifar100.classes)}
    graph = nx.DiGraph()
    # Add root node
    root_node = 'root'
    root = 120
    graph.add_node(root, label="root")

    coarse_labels = list(coarse_to_fine.keys())
    idx = root -1
    l1, l2 = [], []
    for parent_id, parent_name in enumerate(coarse_labels):
        graph.add_node(idx, label=parent_name)
        graph.add_edge(root, idx)
        parent_id =idx
        #print("Added coarse node:", idx, parent_name)
        l1.append(idx)
        idx -= 1
        for child_name in coarse_to_fine[parent_name]:
            if child_name not in fine_labels:
                print(f"Error: '{child_name}' not found in fine_labels")
                continue
            child_id = fine_labels[child_name]
            graph.add_node(idx, label=child_name)
            #print("Added fine node:", idx, child_name)
            graph.add_edge(parent_id, idx)
            l2.append(idx)
            idx -= 1
    return graph, l2, l1

cifar100_graph, l_finer, l_hyper = build_cifar100_hierarchy_root()

# Print the nodes and edges to verify the hierarchy
print("Nodes:", cifar100_graph.nodes(data=True))
print("Edges:", cifar100_graph.edges(data=True))

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:03<00:00, 53415480.47it/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Nodes: [(120, {'label': 'root'}), (119, {'label': 'aquatic mammals'}), (118, {'label': 'beaver'}), (117, {'label': 'dolphin'}), (116, {'label': 'otter'}), (115, {'label': 'seal'}), (114, {'label': 'whale'}), (113, {'label': 'fish'}), (112, {'label': 'aquarium_fish'}), (111, {'label': 'flatfish'}), (110, {'label': 'ray'}), (109, {'label': 'shark'}), (108, {'label': 'trout'}), (107, {'label': 'flowers'}), (106, {'label': 'orchid'}), (105, {'label': 'poppy'}), (104, {'label': 'rose'}), (103, {'label': 'sunflower'}), (102, {'label': 'tulip'}), (101, {'label': 'food containers'}), (100, {'label': 'bottle'}), (99, {'label': 'bowl'}), (98, {'label': 'can'}), (97, {'label': 'cup'}), (96, {'label': 'plate'}), (95, {'label': 'fruit and vegetables'}), (94, {'label': 'apple'}), (93, {'label': 'mushroom'}), (92, {'label': 'orange'}), (91, {'label': 'pear'}), (90, {'label': 'sweet_pepper'}), (89, {'label': 'household electrical devices'}), (88, {'l

In [None]:
model_loaded = torch.load('/content/DistortionEmbedding_weights_64.pth')
#tst = model_loaded["weight"].data.cpu()
embeddings = model_loaded["weight"].data.cpu()
hyper_embedings = embeddings[l_hyper]
finer_embedings = embeddings[l_finer]

In [None]:
import gspread
from google.colab import auth
from google.auth import default
import gspread
import pandas as pd
auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)

# Here we need to specify the relevant sheet i used mine
worksheet = gc.open("results_masters").sheet1

In [None]:
# embeddings = model_loaded["weight"].data.cpu()
hyper_embedings = embeddings[l_hyper]
finer_embedings = embeddings[l_finer]

In [None]:
import random
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
import networkx as nx
from geoopt.manifolds import PoincareBall
import hypll.nn as hnn
from hypll.optim import RiemannianSGD
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR100
from torchvision.transforms import ToTensor
from geoopt import ManifoldParameter

# Initializing the Poincare Ball with curvature 1.0
poincare_ball = PoincareBall(c=1.0)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#


In [None]:

# Initializing the Poincare Ball with curvature 1.0
poincare_ball = PoincareBall(c=1.0)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
lr_ls = [,0.3,0.03]
tau_ls = [10,100]

In [None]:
for lr_var in lr_ls:
  for tau_var in tau_ls:
    # Modify the CNN class to use the device
    class CNN(nn.Module):
        def __init__(self):
            super(CNN, self).__init__()
            self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
            self.pool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
            self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
            self.fc1 = nn.Linear(64 * 8 * 8, 64)  # Adjust the input size of the linear layer
            self.fc2 = nn.Linear(64, 64)  # The final embedding is 2 dimensional

            self.ball = poincare_ball
            self.tau = tau_var

            # Initialize hyper_embedings and finer_embedings
            self.hyperclass_prototypes = hyper_embedings.to(device)  # Move to the device
            self.finer_prototypes = finer_embedings.to(device)  # Move to the device

        def forward(self, x):
            x = self.pool(nn.functional.relu(self.conv1(x)))
            x = self.pool(nn.functional.relu(self.conv2(x)))
            x = nn.functional.relu(self.conv3(x))
            x = x.view(x.size(0), -1)  # Flatten the tensor while preserving the batch size
            x_mid = nn.functional.relu(self.fc1(x))
            embeddings = self.fc2(x_mid)
            embeddings_hyp = self.ball.expmap0(embeddings)  # Project embeddings to the Poincare ball

            hyper_dists = -self.ball.dist(embeddings_hyp.unsqueeze(1), self.hyperclass_prototypes.unsqueeze(0)) * self.tau
            finer_dists = -self.ball.dist(embeddings_hyp.unsqueeze(1), self.finer_prototypes.unsqueeze(0)) * self.tau
            return finer_dists, hyper_dists, embeddings_hyp

    # Create an instance of the model
    model = CNN()

    # Define the loss functions and optimizer
    criterion_fine = nn.CrossEntropyLoss()
    criterion_hyper = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr_var)

    # Train the model
    num_epochs = 100
    model.to(device)
    print("training")
    for epoch in range(num_epochs):
        model.train()
        print(epoch)
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            hyper_labels = labels // 5  # Convert labels to hyperclass labels (0-19)
            optimizer.zero_grad()
            finer_dists, hyper_dists, embeddings_hyp = model(images)
            loss_fine = criterion_fine(finer_dists, labels)
            loss_hyper = criterion_hyper(hyper_dists, hyper_labels)  # Use negative distances as logits for loss calculation
            loss = loss_fine + loss_hyper  # Combined loss
            loss.backward()
            optimizer.step()

    # Get the model predictions
    print("eval")
    model.eval()
    predictions_fine = []
    predictions_hyper = []
    true_labels = []
    with torch.no_grad():
        for images, targets in test_loader:
            images = images.to(device)
            targets = targets.to(device)
            hyper_labels = targets // 5  # Convert labels to hyperclass labels (0-19)
            fine_output, hyper_dists, embeddings_hyp = model(images)
            # Select the class with the maximum probability for fine-grained classification
            pred_fine = fine_output.argmax(dim=1)
            # Select the hyperclass with the minimum distance
            pred_hyper = hyper_dists.argmax(dim=1)
            predictions_fine.append(pred_fine.cpu())
            predictions_hyper.append(pred_hyper.cpu())
            true_labels.append(targets.cpu())

    predictions_fine = torch.cat(predictions_fine, dim=0)
    predictions_hyper = torch.cat(predictions_hyper, dim=0)
    true_labels = torch.cat(true_labels, dim=0)

    # Calculate accuracy for fine-grained classification
    accuracy_fine = (predictions_fine == true_labels).float().mean()
    print(f'Fine-grained Classification Accuracy: {accuracy_fine:.4f}')

    # Calculate accuracy for hyperclass classification
    accuracy_hyper = (predictions_hyper == (true_labels // 5)).float().mean()
    print(f'Hyperclass Classification Accuracy: {accuracy_hyper:.4f}')
    worksheet.append_row([tau_var,lr_var,float(accuracy_hyper),float(accuracy_fine)])

### benchmark:

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR100
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

# Set random seed for reproducibility
torch.manual_seed(42)

# Load the CIFAR-100 dataset
train_dataset = CIFAR100(root='./data', train=True, download=True, transform=ToTensor())
test_dataset = CIFAR100(root='./data', train=False, download=True, transform=ToTensor())

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define the model architecture
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
        self.fc1 = nn.Linear(64 * 8 * 8, 64)  # Adjust the input size of the linear layer
        self.fc2 = nn.Linear(64, 100)# Output layer for 100 classes
        self.fc3 = nn.Linear(64,20) #output for 20 hyper-classes
        # self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        # self.pool = nn.MaxPool2d(2, 2)
        # self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
        # self.fc1 = nn.Linear(64 * 8 * 8, 64)  # Adjust the input size of the linear layer
        # self.fc2 = nn.Linear(64, 64)

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = nn.functional.relu(self.conv3(x))
        x = x.view(x.size(0), -1)  # Flatten the tensor while preserving the batch size
        x = nn.functional.relu(self.fc1(x))
        x_fine = self.fc2(x)
        x_hyper = self.fc3(x)
        # x = self.pool(nn.functional.relu(self.conv1(x)))
        # x = self.pool(nn.functional.relu(self.conv2(x)))
        # x = nn.functional.relu(self.conv3(x))
        # x = x.view(x.size(0), -1)  # Flatten the tensor while preserving the batch size
        # x_mid = nn.functional.relu(self.fc1(x))
        # embeddings = self.fc2(x_mid)
        return x_fine, x_hyper

# Create an instance of the model
model = CNN()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

# Train the model
num_epochs = 100
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print("training")
for epoch in range(num_epochs):
    model.train()
    print(epoch)
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        output_fine,output_hyper = model(images)
        hyper_labels = labels //5
        loss_fine = criterion(output_fine, labels)
        loss_hyper = criterion(output_hyper, hyper_labels)
        loss = loss_fine + loss_hyper
        loss.backward()
        optimizer.step()

# Get the model predictions
print("eval")
model.eval()
predictions_fine = []
predictions_hyper = []
true_labels = []
with torch.no_grad():
    for images, targets in test_loader:
        images = images.to(device)
        targets_fine = targets.to(device)  # Fine-grained targets (0-99)
        targets_hyper = targets // 5  # Convert labels to hyperclass labels (0-19) for evaluation
        outputs,output_hyper = model(images)
        pred_fine = outputs.argmax(dim=1)
        pred_hyper = output_hyper.argmax(dim=1)
        # pred_hyper = (pred_fine // 5)  # Convert fine-grained predictions to hyperclass predictions
        predictions_fine.append(pred_fine.cpu())
        predictions_hyper.append(pred_hyper.cpu())
        true_labels.append(targets_fine.cpu())

predictions_fine = torch.cat(predictions_fine, dim=0)
predictions_hyper = torch.cat(predictions_hyper, dim=0)
true_labels = torch.cat(true_labels, dim=0)
true_hyper_labels = true_labels // 5
print("2 Outputs")
# Calculate accuracy for fine-grained classification
accuracy_fine = (predictions_fine == true_labels).float().mean()
print(f'Fine-grained Classification Accuracy: {accuracy_fine:.4f}')

# Calculate accuracy for hyperclass classification
accuracy_hyper = (predictions_hyper == true_hyper_labels).float().mean()
print(f'Hyperclass Classification Accuracy: {accuracy_hyper:.4f}')


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR100
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

# Set random seed for reproducibility
torch.manual_seed(42)

# Load the CIFAR-100 dataset
train_dataset = CIFAR100(root='./data', train=True, download=True, transform=ToTensor())
test_dataset = CIFAR100(root='./data', train=False, download=True, transform=ToTensor())

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define the model architecture
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
        self.fc1 = nn.Linear(64 * 8 * 8, 64)  # Adjust the input size of the linear layer
        #self.fc2 = nn.Linear(64, 100)# Output layer for 100 classes
        self.fc3 = nn.Linear(64,20) #output for 20 hyper-classes
        # self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        # self.pool = nn.MaxPool2d(2, 2)
        # self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
        # self.fc1 = nn.Linear(64 * 8 * 8, 64)  # Adjust the input size of the linear layer
        # self.fc2 = nn.Linear(64, 64)

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = nn.functional.relu(self.conv3(x))
        x = x.view(x.size(0), -1)  # Flatten the tensor while preserving the batch size
        x = nn.functional.relu(self.fc1(x))
        #x_fine = self.fc2(x)
        x_hyper = self.fc3(x)
        # x = self.pool(nn.functional.relu(self.conv1(x)))
        # x = self.pool(nn.functional.relu(self.conv2(x)))
        # x = nn.functional.relu(self.conv3(x))
        # x = x.view(x.size(0), -1)  # Flatten the tensor while preserving the batch size
        # x_mid = nn.functional.relu(self.fc1(x))
        # embeddings = self.fc2(x_mid)
        return  x_hyper

# Create an instance of the model
model = CNN()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

# Train the model
num_epochs = 100
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print("training")
for epoch in range(num_epochs):
    model.train()
    print(epoch)
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        output_hyper = model(images)
        hyper_labels = labels //5
        #loss_fine = criterion(output_fine, labels)
        loss = criterion(output_hyper, hyper_labels)
        loss.backward()
        optimizer.step()

# Get the model predictions
print("eval")
model.eval()
predictions_fine = []
predictions_hyper = []
true_labels = []
with torch.no_grad():
    for images, targets in test_loader:
        images = images.to(device)
        targets_fine = targets.to(device)  # Fine-grained targets (0-99)
        targets_hyper = targets // 5  # Convert labels to hyperclass labels (0-19) for evaluation
        output_hyper = model(images)
        #pred_fine = outputs.argmax(dim=1)
        pred_hyper = output_hyper.argmax(dim=1)
        # pred_hyper = (pred_fine // 5)  # Convert fine-grained predictions to hyperclass predictions
        #predictions_fine.append(pred_fine.cpu())
        predictions_hyper.append(pred_hyper.cpu())
        true_labels.append(targets_fine.cpu())

#predictions_fine = torch.cat(predictions_fine, dim=0)
predictions_hyper = torch.cat(predictions_hyper, dim=0)
true_labels = torch.cat(true_labels, dim=0)
true_hyper_labels = true_labels // 5
print("hyper only Outputs")
# Calculate accuracy for fine-grained classification
accuracy_fine = 0  #(predictions_fine == true_labels).float().mean()
print(f'Fine-grained Classification Accuracy: {accuracy_fine:.4f}')

# Calculate accuracy for hyperclass classification
accuracy_hyper = (predictions_hyper == true_hyper_labels).float().mean()
print(f'Hyperclass Classification Accuracy: {accuracy_hyper:.4f}')


Files already downloaded and verified
Files already downloaded and verified
training
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
eval
hyper only Outputs
Fine-grained Classification Accuracy: 0.0000
Hyperclass Classification Accuracy: 0.2690


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR100
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

# Set random seed for reproducibility
torch.manual_seed(42)

# Load the CIFAR-100 dataset
train_dataset = CIFAR100(root='./data', train=True, download=True, transform=ToTensor())
test_dataset = CIFAR100(root='./data', train=False, download=True, transform=ToTensor())

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define the model architecture
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
        self.fc1 = nn.Linear(64 * 8 * 8, 64)  # Adjust the input size of the linear layer
        self.fc2 = nn.Linear(64, 20)  # Output layer for 100 classes
        # self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        # self.pool = nn.MaxPool2d(2, 2)
        # self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
        # self.fc1 = nn.Linear(64 * 8 * 8, 64)  # Adjust the input size of the linear layer
        # self.fc2 = nn.Linear(64, 64)

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = nn.functional.relu(self.conv3(x))
        x = x.view(x.size(0), -1)  # Flatten the tensor while preserving the batch size
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        # x = self.pool(nn.functional.relu(self.conv1(x)))
        # x = self.pool(nn.functional.relu(self.conv2(x)))
        # x = nn.functional.relu(self.conv3(x))
        # x = x.view(x.size(0), -1)  # Flatten the tensor while preserving the batch size
        # x_mid = nn.functional.relu(self.fc1(x))
        # embeddings = self.fc2(x_mid)
        return x

# Create an instance of the model
model = CNN()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

# Train the model
num_epochs = 100
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print("training")
for epoch in range(num_epochs):
    model.train()
    print(epoch)
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

# Get the model predictions
print("eval")
model.eval()
predictions_fine = []
predictions_hyper = []
true_labels = []
with torch.no_grad():
    for images, targets in test_loader:
        images = images.to(device)
        targets_fine = targets.to(device)  # Fine-grained targets (0-99)
        targets_hyper = targets // 5  # Convert labels to hyperclass labels (0-19) for evaluation
        outputs = model(images)
        pred_fine = outputs.argmax(dim=1)
        pred_hyper = (pred_fine // 5)  # Convert fine-grained predictions to hyperclass predictions
        predictions_fine.append(pred_fine.cpu())
        predictions_hyper.append(pred_hyper.cpu())
        true_labels.append(targets_fine.cpu())

predictions_fine = torch.cat(predictions_fine, dim=0)
predictions_hyper = torch.cat(predictions_hyper, dim=0)
true_labels = torch.cat(true_labels, dim=0)
true_hyper_labels = true_labels // 5
print("only fine outputs")
# Calculate accuracy for fine-grained classification
accuracy_fine = (predictions_fine == true_labels).float().mean()
print(f'Fine-grained Classification Accuracy: {accuracy_fine:.4f}')

# Calculate accuracy for hyperclass classification
accuracy_hyper = (predictions_hyper == true_hyper_labels).float().mean()
print(f'Hyperclass Classification Accuracy: {accuracy_hyper:.4f}')

Files already downloaded and verified
Files already downloaded and verified
training
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
eval
only fine
Fine-grained Classification Accuracy: 0.2951
Hyperclass Classification Accuracy: 0.3236


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR100
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

# Set random seed for reproducibility
torch.manual_seed(42)

# Load the CIFAR-100 dataset
train_dataset = CIFAR100(root='./data', train=True, download=True, transform=ToTensor())
test_dataset = CIFAR100(root='./data', train=False, download=True, transform=ToTensor())

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define the model architecture
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
        self.fc1 = nn.Linear(64 * 8 * 8, 64)  # Adjust the input size of the linear layer
        self.fc2 = nn.Linear(64, 20)  # Output layer for 100 classes
        # self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        # self.pool = nn.MaxPool2d(2, 2)
        # self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
        # self.fc1 = nn.Linear(64 * 8 * 8, 64)  # Adjust the input size of the linear layer
        # self.fc2 = nn.Linear(64, 64)

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = nn.functional.relu(self.conv3(x))
        x = x.view(x.size(0), -1)  # Flatten the tensor while preserving the batch size
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        # x = self.pool(nn.functional.relu(self.conv1(x)))
        # x = self.pool(nn.functional.relu(self.conv2(x)))
        # x = nn.functional.relu(self.conv3(x))
        # x = x.view(x.size(0), -1)  # Flatten the tensor while preserving the batch size
        # x_mid = nn.functional.relu(self.fc1(x))
        # embeddings = self.fc2(x_mid)
        return x

# Create an instance of the model
model = CNN()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

# Train the model
num_epochs = 100
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print("training")
for epoch in range(num_epochs):
    model.train()
    print(epoch)
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        labels_hyper = labels // 5
        loss = criterion(outputs, labels_hyper)
        loss.backward()
        optimizer.step()

# Get the model predictions
print("eval")
model.eval()
predictions_fine = []
predictions_hyper = []
true_labels = []
with torch.no_grad():
    for images, targets in test_loader:
        images = images.to(device)
        targets_fine = targets.to(device)  # Fine-grained targets (0-99)
        targets_hyper = targets // 5  # Convert labels to hyperclass labels (0-19) for evaluation
        outputs = model(images)
        pred_hyper = outputs.argmax(dim=1)
        #pred_hyper = (pred_fine // 5)  # Convert fine-grained predictions to hyperclass predictions
        #predictions_fine.append(pred_fine.cpu())
        predictions_hyper.append(pred_hyper.cpu())
        true_labels.append(targets_fine.cpu())

# predictions_fine = torch.cat(predictions_fine, dim=0)
predictions_hyper = torch.cat(predictions_hyper, dim=0)
true_labels = torch.cat(true_labels, dim=0)
true_hyper_labels = true_labels // 5
print("only fine")
# Calculate accuracy for fine-grained classification
#accuracy_fine = (predictions_fine == true_labels).float().mean()
print(f'Fine-grained Classification Accuracy: {0:.4f}')

# Calculate accuracy for hyperclass classification
accuracy_hyper = (predictions_hyper == true_hyper_labels).float().mean()
print(f'Hyperclass Classification Accuracy: {accuracy_hyper:.4f}')

Files already downloaded and verified
Files already downloaded and verified
training
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
eval
only fine
Fine-grained Classification Accuracy: 0.0000
Hyperclass Classification Accuracy: 0.2668


### poincare:

In [None]:
import random
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
import networkx as nx
from hypll.manifolds.poincare_ball import Curvature, PoincareBall
import hypll.nn as hnn
from hypll.optim import RiemannianSGD
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR100
from torchvision.transforms import ToTensor
from geoopt import ManifoldParameter
#

# Set random seed for reproducibility
torch.manual_seed(42)

# Set the device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1. Load the CIFAR-100 dataset and build a hierarchy
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

def build_cifar100_hierarchy():
    cifar100 = datasets.CIFAR100(root='./data', train=True, download=True)
    coarse_to_fine = {
        'aquatic mammals': ['beaver', 'dolphin', 'otter', 'seal', 'whale'],
        'fish': ['aquarium_fish', 'flatfish', 'ray', 'shark', 'trout'],
        'flowers': ['orchid', 'poppy', 'rose', 'sunflower', 'tulip'],
        'food containers': ['bottle', 'bowl', 'can', 'cup', 'plate'],
        'fruit and vegetables': ['apple', 'mushroom', 'orange', 'pear', 'sweet_pepper'],
        'household electrical devices': ['clock', 'keyboard', 'lamp', 'telephone', 'television'],
        'household furniture': ['bed', 'chair', 'couch', 'table', 'wardrobe'],
        'insects': ['bee', 'beetle', 'butterfly', 'caterpillar', 'cockroach'],
        'large carnivores': ['bear', 'leopard', 'lion', 'tiger', 'wolf'],
        'large man-made outdoor things': ['bridge', 'castle', 'house', 'road', 'skyscraper'],
        'large natural outdoor scenes': ['cloud', 'forest', 'mountain', 'plain', 'sea'],
        'large omnivores and herbivores': ['camel', 'cattle', 'chimpanzee', 'elephant', 'kangaroo'],
        'medium-sized mammals': ['fox', 'porcupine', 'possum', 'raccoon', 'skunk'],
        'non-insect invertebrates': ['crab', 'lobster', 'snail', 'spider', 'worm'],
        'people': ['baby', 'boy', 'girl', 'man', 'woman'],
        'reptiles': ['crocodile', 'dinosaur', 'lizard', 'snake', 'turtle'],
        'small mammals': ['hamster', 'mouse', 'rabbit', 'shrew', 'squirrel'],
        'trees': ['maple_tree', 'oak_tree', 'palm_tree', 'pine_tree', 'willow_tree'],
        'vehicles 1': ['bicycle', 'bus', 'motorcycle', 'pickup_truck', 'train'],
        'vehicles 2': ['lawn_mower', 'rocket', 'streetcar', 'tank', 'tractor'],
    }
    fine_labels = {label: i for i, label in enumerate(cifar100.classes)}
    graph = nx.DiGraph()

    coarse_labels = list(coarse_to_fine.keys())
    idx = 0
    l1, l2 = [], []
    for parent_id, parent_name in enumerate(coarse_labels):
        graph.add_node(idx, name=parent_name)
        l1.append(idx)
        idx += 1
        for child_name in coarse_to_fine[parent_name]:
            if child_name not in fine_labels:
                print(f"Error: '{child_name}' not found in fine_labels")
                continue
            child_id = fine_labels[child_name]
            graph.add_node(idx, name=child_name)
            graph.add_edge(parent_id, idx)
            l2.append(idx)
            idx += 1
    return graph, l2, l1

cifar100_graph, l_finer, l_hyper = build_cifar100_hierarchy()

# 2. Create a dataset containing the graph from which we can sample
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

class CIFAR100EmbeddingDataset(Dataset):
    def __init__(self, graph):
        super().__init__()
        self.graph = graph
        self.edges_list = list(graph.edges())

    def __len__(self):
        return len(self.edges_list)

    def __getitem__(self, idx):
        rel = self.edges_list[idx]
        negative_target_nodes = list(
            self.graph.nodes() - nx.descendants(self.graph, rel[0]) - {rel[0]}
        )
        negative_target_sample_size = min(5, len(negative_target_nodes))
        negative_target_nodes_sample = random.sample(
            negative_target_nodes, negative_target_sample_size
        )
        edges = torch.tensor([rel] + [[rel[0], neg] for neg in negative_target_nodes_sample])
        negative_source_nodes = list(
            self.graph.nodes() - nx.ancestors(self.graph, rel[1]) - {rel[1]}
        )
        negative_source_sample_size = 10 - negative_target_sample_size
        negative_source_nodes_sample = random.sample(
            negative_source_nodes, negative_source_sample_size
        )
        edges = torch.cat(
            tensors=(edges, torch.tensor([[neg, rel[1]] for neg in negative_source_nodes_sample])),
            dim=0,
        )
        edge_label_targets = torch.cat(tensors=[torch.ones(1).bool(), torch.zeros(10).bool()])
        return edges, edge_label_targets

dataset = CIFAR100EmbeddingDataset(cifar100_graph)
dataloader = DataLoader(dataset, batch_size=10, shuffle=True)

# 3. Initialize the Poincare ball on which the embeddings will be trained
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

poincare_ball = PoincareBall(Curvature(1.0))

# 4. Define the Poincare embedding model
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

class PoincareEmbedding(hnn.HEmbedding):
    def __init__(self, num_embeddings, embedding_dim, manifold):
        super().__init__(num_embeddings, embedding_dim, manifold)
        self.to(device)  # Move the model to the device

    def forward(self, edges):
        embeddings = super().forward(edges)
        edge_distances = self.manifold.dist(x=embeddings[:, :, 0, :], y=embeddings[:, :, 1, :])
        return edge_distances

model = PoincareEmbedding(
    num_embeddings=len(cifar100_graph.nodes()),
    embedding_dim=64,
    manifold=poincare_ball,
)

# 5. Define the Poincare embedding loss function
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

def poincare_embeddings_loss(dists, targets):
    logits = dists.neg().exp()
    numerator = torch.where(condition=targets, input=logits, other=0).sum(dim=-1)
    denominator = logits.sum(dim=-1)
    loss = (numerator / denominator).log().mean().neg()
    return loss

# 6. Perform a few "burn-in" training epochs with reduced learning rate
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

optimizer = RiemannianSGD(
    params=model.parameters(),
    lr=0.3 / 10,
)

for epoch in range(10):
    average_loss = 0
    for idx, (edges, edge_label_targets) in enumerate(dataloader):
        edges = edges.to(device)  # Move the data to the device
        edge_label_targets = edge_label_targets.to(device)  # Move the labels to the device
        optimizer.zero_grad()
        dists = model(edges)
        loss = poincare_embeddings_loss(dists=dists, targets=edge_label_targets)
        loss.backward()
        optimizer.step()
        average_loss += loss

    average_loss /= len(dataloader)
    print(f"Burn-in epoch {epoch} loss: {average_loss}")

optimizer = RiemannianSGD(
    params=model.parameters(),
    lr=0.3,
)

for epoch in range(300):
    average_loss = 0
    for idx, (edges, edge_label_targets) in enumerate(dataloader):
        edges = edges.to(device)  # Move the data to the device
        edge_label_targets = edge_label_targets.to(device)  # Move the labels to the device
        optimizer.zero_grad()
        dists = model(edges)
        loss = poincare_embeddings_loss(dists=dists, targets=edge_label_targets)
        loss.backward()
        optimizer.step()
        average_loss += loss

    average_loss /= len(dataloader)
    print(f"Epoch {epoch} loss: {average_loss}")

# You have now trained your own Poincare Embeddings on CIFAR-100!

embeddings = model.weight.data.cpu()
hyper_embedings = embeddings[l_hyper]
finer_embedings = embeddings[l_finer]

# Load the CIFAR-100 dataset
train_dataset = CIFAR100(root='./data', train=True, download=True, transform=ToTensor())
test_dataset = CIFAR100(root='./data', train=False, download=True, transform=ToTensor())

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

Files already downloaded and verified
Burn-in epoch 0 loss: 2.4113593101501465
Burn-in epoch 1 loss: 2.4108362197875977
Burn-in epoch 2 loss: 2.4108407497406006
Burn-in epoch 3 loss: 2.407682180404663
Burn-in epoch 4 loss: 2.3963711261749268
Burn-in epoch 5 loss: 2.401576519012451
Burn-in epoch 6 loss: 2.3986127376556396
Burn-in epoch 7 loss: 2.400952100753784
Burn-in epoch 8 loss: 2.400409698486328
Burn-in epoch 9 loss: 2.3857576847076416
Epoch 0 loss: 2.3839635848999023
Epoch 1 loss: 2.37147855758667
Epoch 2 loss: 2.35121750831604
Epoch 3 loss: 2.339600086212158
Epoch 4 loss: 2.3328092098236084
Epoch 5 loss: 2.309572458267212
Epoch 6 loss: 2.298741102218628
Epoch 7 loss: 2.294097423553467
Epoch 8 loss: 2.283087968826294
Epoch 9 loss: 2.265986919403076
Epoch 10 loss: 2.260411262512207
Epoch 11 loss: 2.2608799934387207
Epoch 12 loss: 2.2517738342285156
Epoch 13 loss: 2.258533477783203
Epoch 14 loss: 2.2428665161132812
Epoch 15 loss: 2.2367608547210693
Epoch 16 loss: 2.2381465435028076


In [None]:
import random
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
import networkx as nx
from geoopt.manifolds import PoincareBall
import hypll.nn as hnn
from hypll.optim import RiemannianSGD
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR100
from torchvision.transforms import ToTensor
from geoopt import ManifoldParameter

# Initializing the Poincare Ball with curvature 1.0
poincare_ball = PoincareBall(c=1.0)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#


In [None]:
lr_ls = [0.00003]
tau_ls =[10]
for lr_var in lr_ls:
  for tau_var in tau_ls:
    # Modify the CNN class to use the device
    class CNN(nn.Module):
        def __init__(self):
            super(CNN, self).__init__()
            self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
            self.pool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
            self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
            self.fc1 = nn.Linear(64 * 8 * 8, 64)  # Adjust the input size of the linear layer
            self.fc2 = nn.Linear(64, 64)  # The final embedding is 2 dimensional

            self.ball = poincare_ball
            self.tau = tau_var

            # Initialize hyper_embedings and finer_embedings
            self.hyperclass_prototypes = hyper_embedings.to(device)  # Move to the device
            self.finer_prototypes = finer_embedings.to(device)  # Move to the device

        def forward(self, x):
            x = self.pool(nn.functional.relu(self.conv1(x)))
            x = self.pool(nn.functional.relu(self.conv2(x)))
            x = nn.functional.relu(self.conv3(x))
            x = x.view(x.size(0), -1)  # Flatten the tensor while preserving the batch size
            x_mid = nn.functional.relu(self.fc1(x))
            embeddings = self.fc2(x_mid)
            embeddings_hyp = self.ball.expmap0(embeddings)  # Project embeddings to the Poincare ball

            hyper_dists = -self.ball.dist(embeddings_hyp.unsqueeze(1), self.hyperclass_prototypes.unsqueeze(0)) * self.tau
            finer_dists = -self.ball.dist(embeddings_hyp.unsqueeze(1), self.finer_prototypes.unsqueeze(0)) * self.tau
            return finer_dists, hyper_dists

    # Create an instance of the model
    model = CNN()

    # Define the loss functions and optimizer
    criterion_fine = nn.CrossEntropyLoss()
    criterion_hyper = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr_var)

    # Train the model
    num_epochs = 100
    model.to(device)
    print("training")
    for epoch in range(num_epochs):
        model.train()
        print(epoch)
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            hyper_labels = labels // 5  # Convert labels to hyperclass labels (0-19)
            optimizer.zero_grad()
            finer_dists, hyper_dists  = model(images)
            loss_fine = criterion_fine(finer_dists, labels)
            #loss_hyper = criterion_hyper(hyper_dists, hyper_labels)  # Use negative distances as logits for loss calculation
            loss = loss_fine #+ loss_hyper  # Combined loss
            loss.backward()
            optimizer.step()

    # Get the model predictions
    print("eval")
    model.eval()
    predictions_fine = []
    predictions_hyper = []
    true_labels = []
    with torch.no_grad():
        for images, targets in test_loader:
            images = images.to(device)
            targets = targets.to(device)
            hyper_labels = targets // 5  # Convert labels to hyperclass labels (0-19)
            fine_output, hyper_dists  = model(images)
            # Select the class with the maximum probability for fine-grained classification
            pred_fine = fine_output.argmax(dim=1)
            # Select the hyperclass with the minimum distance
            pred_hyper = pred_fine // 5 # hyper_dists.argmax(dim=1)
            predictions_fine.append(pred_fine.cpu())
            predictions_hyper.append(pred_hyper.cpu())
            true_labels.append(targets.cpu())

    predictions_fine = torch.cat(predictions_fine, dim=0)
    predictions_hyper = torch.cat(predictions_hyper, dim=0)
    true_labels = torch.cat(true_labels, dim=0)

    # Calculate accuracy for fine-grained classification
    accuracy_fine = (predictions_fine == true_labels).float().mean()
    print(f'Fine-grained Classification Accuracy: {accuracy_fine:.4f}')
    print("only fine")
    # Calculate accuracy for hyperclass classification
    accuracy_hyper = (predictions_hyper == (true_labels // 5)).float().mean()
    print(f'Hyperclass Classification Accuracy: {accuracy_hyper:.4f}')
    worksheet.append_row([tau_var,lr_var,float(accuracy_hyper),float(accuracy_fine)])

training
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
eval
Fine-grained Classification Accuracy: 0.3142
Hyperclass Classification Accuracy: 0.3456


In [None]:
lr_ls = [0.00003]
tau_ls =[10]
for lr_var in lr_ls:
  for tau_var in tau_ls:
    # Modify the CNN class to use the device
    class CNN(nn.Module):
        def __init__(self):
            super(CNN, self).__init__()
            self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
            self.pool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
            self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
            self.fc1 = nn.Linear(64 * 8 * 8, 64)  # Adjust the input size of the linear layer
            self.fc2 = nn.Linear(64, 64)  # The final embedding is 2 dimensional

            self.ball = poincare_ball
            self.tau = tau_var

            # Initialize hyper_embedings and finer_embedings
            self.hyperclass_prototypes = hyper_embedings.to(device)  # Move to the device
            self.finer_prototypes = finer_embedings.to(device)  # Move to the device

        def forward(self, x):
            x = self.pool(nn.functional.relu(self.conv1(x)))
            x = self.pool(nn.functional.relu(self.conv2(x)))
            x = nn.functional.relu(self.conv3(x))
            x = x.view(x.size(0), -1)  # Flatten the tensor while preserving the batch size
            x_mid = nn.functional.relu(self.fc1(x))
            embeddings = self.fc2(x_mid)
            embeddings_hyp = self.ball.expmap0(embeddings)  # Project embeddings to the Poincare ball

            hyper_dists = -self.ball.dist(embeddings_hyp.unsqueeze(1), self.hyperclass_prototypes.unsqueeze(0)) * self.tau
            finer_dists = -self.ball.dist(embeddings_hyp.unsqueeze(1), self.finer_prototypes.unsqueeze(0)) * self.tau
            return finer_dists, hyper_dists

    # Create an instance of the model
    model = CNN()

    # Define the loss functions and optimizer
    criterion_fine = nn.CrossEntropyLoss()
    criterion_hyper = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr_var)

    # Train the model
    num_epochs = 100
    model.to(device)
    print("training")
    for epoch in range(num_epochs):
        model.train()
        print(epoch)
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            hyper_labels = labels // 5  # Convert labels to hyperclass labels (0-19)
            optimizer.zero_grad()
            finer_dists, hyper_dists  = model(images)
            #loss_fine = criterion_fine(finer_dists, labels)
            loss_hyper = criterion_hyper(hyper_dists, hyper_labels)  # Use negative distances as logits for loss calculation
            loss = loss_hyper #+ loss_hyper loss_fine # Combined loss
            loss.backward()
            optimizer.step()

    # Get the model predictions
    print("eval")
    model.eval()
    predictions_fine = []
    predictions_hyper = []
    true_labels = []
    with torch.no_grad():
        for images, targets in test_loader:
            images = images.to(device)
            targets = targets.to(device)
            hyper_labels = targets // 5  # Convert labels to hyperclass labels (0-19)
            fine_output, hyper_dists  = model(images)
            # Select the class with the maximum probability for fine-grained classification
            # pred_fine = fine_output.argmax(dim=1)
            # Select the hyperclass with the minimum distance
            pred_hyper =  hyper_dists.argmax(dim=1)
            #predictions_fine.append(pred_fine.cpu())
            predictions_hyper.append(pred_hyper.cpu())
            true_labels.append(targets.cpu())
    print("hyper only")
    #predictions_fine = torch.cat(predictions_fine, dim=0)
    predictions_hyper = torch.cat(predictions_hyper, dim=0)
    true_labels = torch.cat(true_labels, dim=0)

    # Calculate accuracy for fine-grained classification
     # accuracy_fine = (predictions_fine == true_labels).float().mean()
    print(f'Fine-grained Classification Accuracy: {0:.4f}')

    # Calculate accuracy for hyperclass classification
    accuracy_hyper = (predictions_hyper == (true_labels // 5)).float().mean()
    print(f'Hyperclass Classification Accuracy: {accuracy_hyper:.4f}')
    worksheet.append_row([tau_var,lr_var,float(accuracy_hyper),float(accuracy_fine)])

training
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
eval
hyper only
Fine-grained Classification Accuracy: 0.0000
Hyperclass Classification Accuracy: 0.3353


### distortion:

In [None]:
import random
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
import networkx as nx
from hypll.manifolds.poincare_ball import Curvature, PoincareBall
import hypll.nn as hnn
from hypll.optim import RiemannianSGD

# 1. Load the CIFAR-100 dataset and build a hierarchy
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

# cifar100_graph = build_cifar100_hierarchy()
import networkx as nx
from torchvision import datasets
def build_cifar100_hierarchy_root():
    cifar100 = datasets.CIFAR100(root='./data', train=True, download=True)
    coarse_to_fine = {
        'aquatic mammals': ['beaver', 'dolphin', 'otter', 'seal', 'whale'],
        'fish': ['aquarium_fish', 'flatfish', 'ray', 'shark', 'trout'],
        'flowers': ['orchid', 'poppy', 'rose', 'sunflower', 'tulip'],
        'food containers': ['bottle', 'bowl', 'can', 'cup', 'plate'],
        'fruit and vegetables': ['apple', 'mushroom', 'orange', 'pear', 'sweet_pepper'],
        'household electrical devices': ['clock', 'keyboard', 'lamp', 'telephone', 'television'],
        'household furniture': ['bed', 'chair', 'couch', 'table', 'wardrobe'],
        'insects': ['bee', 'beetle', 'butterfly', 'caterpillar', 'cockroach'],
        'large carnivores': ['bear', 'leopard', 'lion', 'tiger', 'wolf'],
        'large man-made outdoor things': ['bridge', 'castle', 'house', 'road', 'skyscraper'],
        'large natural outdoor scenes': ['cloud', 'forest', 'mountain', 'plain', 'sea'],
        'large omnivores and herbivores': ['camel', 'cattle', 'chimpanzee', 'elephant', 'kangaroo'],
        'medium-sized mammals': ['fox', 'porcupine', 'possum', 'raccoon', 'skunk'],
        'non-insect invertebrates': ['crab', 'lobster', 'snail', 'spider', 'worm'],
        'people': ['baby', 'boy', 'girl', 'man', 'woman'],
        'reptiles': ['crocodile', 'dinosaur', 'lizard', 'snake', 'turtle'],
        'small mammals': ['hamster', 'mouse', 'rabbit', 'shrew', 'squirrel'],
        'trees': ['maple_tree', 'oak_tree', 'palm_tree', 'pine_tree', 'willow_tree'],
        'vehicles 1': ['bicycle', 'bus', 'motorcycle', 'pickup_truck', 'train'],
        'vehicles 2': ['lawn_mower', 'rocket', 'streetcar', 'tank', 'tractor'],
    }
    fine_labels = {label: i for i, label in enumerate(cifar100.classes)}
    graph = nx.DiGraph()
    # Add root node
    root_node = 'root'
    root = 120
    graph.add_node(root, label="root")
    t_id = 0
    coarse_labels = list(coarse_to_fine.keys())
    idx = root -1
    l1, l2 = [], []
    for parent_id, parent_name in enumerate(coarse_labels):
        graph.add_node(idx, label=parent_name)
        graph.add_edge(root, idx)
        parent_id =idx
        #print("Added coarse node:", idx, parent_name)
        l1.append(t_id)
        idx -= 1
        t_id = t_id + 1
        for child_name in coarse_to_fine[parent_name]:
            if child_name not in fine_labels:
                print(f"Error: '{child_name}' not found in fine_labels")
                continue
            child_id = fine_labels[child_name]
            graph.add_node(idx, label=child_name)
            #print("Added fine node:", idx, child_name)
            graph.add_edge(parent_id, idx)
            l2.append(t_id)
            t_id = t_id + 1
            idx -= 1
    return graph, l2, l1

cifar100_graph, l_finer, l_hyper = build_cifar100_hierarchy_root()

# Print the nodes and edges to verify the hierarchy
print("Nodes:", cifar100_graph.nodes(data=True))
print("Edges:", cifar100_graph.edges(data=True))

Files already downloaded and verified
Nodes: [(120, {'label': 'root'}), (119, {'label': 'aquatic mammals'}), (118, {'label': 'beaver'}), (117, {'label': 'dolphin'}), (116, {'label': 'otter'}), (115, {'label': 'seal'}), (114, {'label': 'whale'}), (113, {'label': 'fish'}), (112, {'label': 'aquarium_fish'}), (111, {'label': 'flatfish'}), (110, {'label': 'ray'}), (109, {'label': 'shark'}), (108, {'label': 'trout'}), (107, {'label': 'flowers'}), (106, {'label': 'orchid'}), (105, {'label': 'poppy'}), (104, {'label': 'rose'}), (103, {'label': 'sunflower'}), (102, {'label': 'tulip'}), (101, {'label': 'food containers'}), (100, {'label': 'bottle'}), (99, {'label': 'bowl'}), (98, {'label': 'can'}), (97, {'label': 'cup'}), (96, {'label': 'plate'}), (95, {'label': 'fruit and vegetables'}), (94, {'label': 'apple'}), (93, {'label': 'mushroom'}), (92, {'label': 'orange'}), (91, {'label': 'pear'}), (90, {'label': 'sweet_pepper'}), (89, {'label': 'household electrical devices'}), (88, {'label': 'clock'

In [None]:
model_loaded = torch.load('/content/DistortionEmbedding_weights_64.pth')
#tst = model_loaded["weight"].data.cpu()
embeddings = model_loaded["weight"].data.cpu()
hyper_embedings = embeddings[l_hyper]
finer_embedings = embeddings[l_finer]

In [None]:
hyper_embedings.shape

torch.Size([20, 64])

In [None]:
import random
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
import networkx as nx
from geoopt.manifolds import PoincareBall
import hypll.nn as hnn
from hypll.optim import RiemannianSGD
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR100
from torchvision.transforms import ToTensor
from geoopt import ManifoldParameter

# Initializing the Poincare Ball with curvature 1.0
poincare_ball = PoincareBall(c=1.0)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_dataset = CIFAR100(root='./data', train=True, download=True, transform=ToTensor())
test_dataset = CIFAR100(root='./data', train=False, download=True, transform=ToTensor())

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
#



Files already downloaded and verified
Files already downloaded and verified


In [None]:
lr_ls = [0.00003]
tau_ls =[10]
for lr_var in lr_ls:
  for tau_var in tau_ls:
    # Modify the CNN class to use the device
    class CNN(nn.Module):
        def __init__(self):
            super(CNN, self).__init__()
            self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
            self.pool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
            self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
            self.fc1 = nn.Linear(64 * 8 * 8, 64)  # Adjust the input size of the linear layer
            self.fc2 = nn.Linear(64, 64)  # The final embedding is 2 dimensional

            self.ball = poincare_ball
            self.tau = tau_var

            # Initialize hyper_embedings and finer_embedings
            self.hyperclass_prototypes = hyper_embedings.to(device)  # Move to the device
            self.finer_prototypes = finer_embedings.to(device)  # Move to the device

        def forward(self, x):
            x = self.pool(nn.functional.relu(self.conv1(x)))
            x = self.pool(nn.functional.relu(self.conv2(x)))
            x = nn.functional.relu(self.conv3(x))
            x = x.view(x.size(0), -1)  # Flatten the tensor while preserving the batch size
            x_mid = nn.functional.relu(self.fc1(x))
            embeddings = self.fc2(x_mid)
            embeddings_hyp = self.ball.expmap0(embeddings)  # Project embeddings to the Poincare ball

            hyper_dists = -self.ball.dist(embeddings_hyp.unsqueeze(1), self.hyperclass_prototypes.unsqueeze(0)) * self.tau
            finer_dists = -self.ball.dist(embeddings_hyp.unsqueeze(1), self.finer_prototypes.unsqueeze(0)) * self.tau
            return finer_dists, hyper_dists

    # Create an instance of the model
    model = CNN()

    # Define the loss functions and optimizer
    criterion_fine = nn.CrossEntropyLoss()
    criterion_hyper = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr_var)

    # Train the model
    num_epochs = 100
    model.to(device)
    print("training")
    for epoch in range(num_epochs):
        model.train()
        print(epoch)
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            hyper_labels = labels // 5  # Convert labels to hyperclass labels (0-19)
            optimizer.zero_grad()
            finer_dists, hyper_dists  = model(images)
            loss_fine = criterion_fine(finer_dists, labels)
            #loss_hyper = criterion_hyper(hyper_dists, hyper_labels)  # Use negative distances as logits for loss calculation
            loss = loss_fine #+ loss_hyper  # Combined loss
            loss.backward()
            optimizer.step()

    # Get the model predictions
    print("eval")
    model.eval()
    predictions_fine = []
    predictions_hyper = []
    true_labels = []
    with torch.no_grad():
        for images, targets in test_loader:
            images = images.to(device)
            targets = targets.to(device)
            hyper_labels = targets // 5  # Convert labels to hyperclass labels (0-19)
            fine_output, hyper_dists  = model(images)
            # Select the class with the maximum probability for fine-grained classification
            pred_fine = fine_output.argmax(dim=1)
            # Select the hyperclass with the minimum distance
            pred_hyper = pred_fine // 5 # hyper_dists.argmax(dim=1)
            predictions_fine.append(pred_fine.cpu())
            predictions_hyper.append(pred_hyper.cpu())
            true_labels.append(targets.cpu())

    predictions_fine = torch.cat(predictions_fine, dim=0)
    predictions_hyper = torch.cat(predictions_hyper, dim=0)
    true_labels = torch.cat(true_labels, dim=0)
    print("fine only")
    # Calculate accuracy for fine-grained classification
    accuracy_fine = (predictions_fine == true_labels).float().mean()
    print(f'Fine-grained Classification Accuracy: {accuracy_fine:.4f}')

    # Calculate accuracy for hyperclass classification
    accuracy_hyper = (predictions_hyper == (true_labels // 5)).float().mean()
    print(f'Hyperclass Classification Accuracy: {accuracy_hyper:.4f}')
    worksheet.append_row([tau_var,lr_var,float(accuracy_hyper),float(accuracy_fine)])

training
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
eval
fine only
Fine-grained Classification Accuracy: 0.0595
Hyperclass Classification Accuracy: 0.3380


NameError: name 'worksheet' is not defined

In [None]:
lr_ls = [0.00003]
tau_ls =[10]
for lr_var in lr_ls:
  for tau_var in tau_ls:
    # Modify the CNN class to use the device
    class CNN(nn.Module):
        def __init__(self):
            super(CNN, self).__init__()
            self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
            self.pool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
            self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
            self.fc1 = nn.Linear(64 * 8 * 8, 64)  # Adjust the input size of the linear layer
            self.fc2 = nn.Linear(64, 64)  # The final embedding is 2 dimensional

            self.ball = poincare_ball
            self.tau = tau_var

            # Initialize hyper_embedings and finer_embedings
            self.hyperclass_prototypes = hyper_embedings.to(device)  # Move to the device
            self.finer_prototypes = finer_embedings.to(device)  # Move to the device

        def forward(self, x):
            x = self.pool(nn.functional.relu(self.conv1(x)))
            x = self.pool(nn.functional.relu(self.conv2(x)))
            x = nn.functional.relu(self.conv3(x))
            x = x.view(x.size(0), -1)  # Flatten the tensor while preserving the batch size
            x_mid = nn.functional.relu(self.fc1(x))
            embeddings = self.fc2(x_mid)
            embeddings_hyp = self.ball.expmap0(embeddings)  # Project embeddings to the Poincare ball

            hyper_dists = -self.ball.dist(embeddings_hyp.unsqueeze(1), self.hyperclass_prototypes.unsqueeze(0)) * self.tau
            finer_dists = -self.ball.dist(embeddings_hyp.unsqueeze(1), self.finer_prototypes.unsqueeze(0)) * self.tau
            return finer_dists, hyper_dists

    # Create an instance of the model
    model = CNN()

    # Define the loss functions and optimizer
    criterion_fine = nn.CrossEntropyLoss()
    criterion_hyper = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr_var)

    # Train the model
    num_epochs = 100
    model.to(device)
    print("training")
    for epoch in range(num_epochs):
        model.train()
        print(epoch)
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            hyper_labels = labels // 5  # Convert labels to hyperclass labels (0-19)
            optimizer.zero_grad()
            finer_dists, hyper_dists  = model(images)
            #loss_fine = criterion_fine(finer_dists, labels)
            loss_hyper = criterion_hyper(hyper_dists, hyper_labels)  # Use negative distances as logits for loss calculation
            loss = loss_hyper #+ loss_hyper loss_fine # Combined loss
            loss.backward()
            optimizer.step()

    # Get the model predictions
    print("eval")
    model.eval()
    predictions_fine = []
    predictions_hyper = []
    true_labels = []
    with torch.no_grad():
        for images, targets in test_loader:
            images = images.to(device)
            targets = targets.to(device)
            hyper_labels = targets // 5  # Convert labels to hyperclass labels (0-19)
            fine_output, hyper_dists  = model(images)
            # Select the class with the maximum probability for fine-grained classification
            # pred_fine = fine_output.argmax(dim=1)
            # Select the hyperclass with the minimum distance
            pred_hyper =  hyper_dists.argmax(dim=1)
            #predictions_fine.append(pred_fine.cpu())
            predictions_hyper.append(pred_hyper.cpu())
            true_labels.append(targets.cpu())
    print("hyper only")
    #predictions_fine = torch.cat(predictions_fine, dim=0)
    predictions_hyper = torch.cat(predictions_hyper, dim=0)
    true_labels = torch.cat(true_labels, dim=0)

    # Calculate accuracy for fine-grained classification
     # accuracy_fine = (predictions_fine == true_labels).float().mean()
    print(f'Fine-grained Classification Accuracy: {0:.4f}')

    # Calculate accuracy for hyperclass classification
    accuracy_hyper = (predictions_hyper == (true_labels // 5)).float().mean()
    print(f'Hyperclass Classification Accuracy: {accuracy_hyper:.4f}')
    worksheet.append_row([tau_var,lr_var,float(accuracy_hyper),float(accuracy_fine)])

training
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
eval
hyper only
Fine-grained Classification Accuracy: 0.0000
Hyperclass Classification Accuracy: 0.3406


In [None]:
lr_ls = [0.00003]
tau_ls =[10]
for lr_var in lr_ls:
  for tau_var in tau_ls:
    # Modify the CNN class to use the device
    class CNN(nn.Module):
        def __init__(self):
            super(CNN, self).__init__()
            self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
            self.pool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
            self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
            self.fc1 = nn.Linear(64 * 8 * 8, 64)  # Adjust the input size of the linear layer
            self.fc2 = nn.Linear(64, 64)  # The final embedding is 2 dimensional

            self.ball = poincare_ball
            self.tau = tau_var

            # Initialize hyper_embedings and finer_embedings
            self.hyperclass_prototypes = hyper_embedings.to(device)  # Move to the device
            self.finer_prototypes = finer_embedings.to(device)  # Move to the device

        def forward(self, x):
            x = self.pool(nn.functional.relu(self.conv1(x)))
            x = self.pool(nn.functional.relu(self.conv2(x)))
            x = nn.functional.relu(self.conv3(x))
            x = x.view(x.size(0), -1)  # Flatten the tensor while preserving the batch size
            x_mid = nn.functional.relu(self.fc1(x))
            embeddings = self.fc2(x_mid)
            embeddings_hyp = self.ball.expmap0(embeddings)  # Project embeddings to the Poincare ball

            hyper_dists = -self.ball.dist(embeddings_hyp.unsqueeze(1), self.hyperclass_prototypes.unsqueeze(0)) * self.tau
            finer_dists = -self.ball.dist(embeddings_hyp.unsqueeze(1), self.finer_prototypes.unsqueeze(0)) * self.tau
            return finer_dists, hyper_dists

    # Create an instance of the model
    model = CNN()

    # Define the loss functions and optimizer
    criterion_fine = nn.CrossEntropyLoss()
    criterion_hyper = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr_var)

    # Train the model
    num_epochs = 100
    model.to(device)
    print("training")
    for epoch in range(num_epochs):
        model.train()
        print(epoch)
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            hyper_labels = labels // 5  # Convert labels to hyperclass labels (0-19)
            optimizer.zero_grad()
            finer_dists, hyper_dists  = model(images)
            loss_fine = criterion_fine(finer_dists, labels)
            loss_hyper = criterion_hyper(hyper_dists, hyper_labels)  # Use negative distances as logits for loss calculation
            loss = loss_fine + loss_hyper  # Combined loss
            loss.backward()
            optimizer.step()

    # Get the model predictions
    print("eval")
    model.eval()
    predictions_fine = []
    predictions_hyper = []
    true_labels = []
    with torch.no_grad():
        for images, targets in test_loader:
            images = images.to(device)
            targets = targets.to(device)
            hyper_labels = targets // 5  # Convert labels to hyperclass labels (0-19)
            fine_output, hyper_dists  = model(images)
            # Select the class with the maximum probability for fine-grained classification
            pred_fine = fine_output.argmax(dim=1)
            # Select the hyperclass with the minimum distance
            pred_hyper =  hyper_dists.argmax(dim=1)
            predictions_fine.append(pred_fine.cpu())
            predictions_hyper.append(pred_hyper.cpu())
            true_labels.append(targets.cpu())

    predictions_fine = torch.cat(predictions_fine, dim=0)
    predictions_hyper = torch.cat(predictions_hyper, dim=0)
    true_labels = torch.cat(true_labels, dim=0)
    print("two output")
    # Calculate accuracy for fine-grained classification
    accuracy_fine = (predictions_fine == true_labels).float().mean()
    print(f'Fine-grained Classification Accuracy: {accuracy_fine:.4f}')

    # Calculate accuracy for hyperclass classification
    accuracy_hyper = (predictions_hyper == (true_labels // 5)).float().mean()
    print(f'Hyperclass Classification Accuracy: {accuracy_hyper:.4f}')
    worksheet.append_row([tau_var,lr_var,float(accuracy_hyper),float(accuracy_fine)])

training
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
eval
fine only
Fine-grained Classification Accuracy: 0.0964
Hyperclass Classification Accuracy: 0.3357
