In [1]:
from models.waffleiron.segmenter import Segmenter
import torch
from datasets import LIST_DATASETS, Collate
from tqdm import tqdm

model = Segmenter(
    input_channels=5,
    feat_channels=768,
    depth=48,
    grid_shape=[[256, 256], [256, 32], [256, 32]],
    nb_class=16, # class for prediction
    #drop_path_prob=config["waffleiron"]["drop_path"],
    layer_norm=True,
)

# Load pretrained model
ckpt = torch.load('./saved_models/ckpt_last_scalr.pth', map_location="cuda:0")
ckpt = ckpt["net"]

new_ckpt = {}
for k in ckpt.keys():
    if k.startswith("module"):
        if k.startswith("module.classif.0"):
            continue
        elif k.startswith("module.classif.1"):
            new_ckpt["classif" + k[len("module.classif.1") :]] = ckpt[k]
        else:
            new_ckpt[k[len("module.") :]] = ckpt[k]
    else:
        new_ckpt[k] = ckpt[k]

model.load_state_dict(new_ckpt)

torch.cuda.set_device("cuda:0")
model = model.cuda("cuda:0")

model.eval()

kwargs = {
        "rootdir": '/root/main/dataset/nuscenes',
        "input_feat": ["xyz", "intensity", "radius"],
        "voxel_size": 0.1,
        "num_neighbors": 16,
        "dim_proj": [2, 1, 0],
        "grids_shape": [[256, 256], [256, 32], [256, 32]],
        "fov_xyz": [[-64, -64, -8], [64, 64, 8]], # Check here
    }

# Get datatset
DATASET = LIST_DATASETS.get("nuscenes")
if DATASET is None:
    raise ValueError(f"Dataset {args.dataset.lower()} not available.")

# Train dataset
train_dataset = DATASET(
    phase="val",
    **kwargs,
)

train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=1,
        pin_memory=True,
        drop_last=True,
        collate_fn=Collate(),
    )

Using torch.scatter_reduce for 3D to 2D projection.
Using torch.scatter_reduce for 3D to 2D projection.


  ckpt = torch.load('./saved_models/ckpt_last_scalr.pth', map_location="cuda:0")


Loading NuScenes tables for version v1.0-mini...
Loading nuScenes-lidarseg...
32 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
404 lidarseg,
Done loading in 0.647 seconds.
Reverse indexing ...
Done reverse indexing in 0.1 seconds.


In [2]:
import math
import matplotlib.pyplot as plt

for stop in range(0,49,3):
    for it, batch in enumerate(train_loader):
        
        if it == 15: # Only the first sample

            # Network inputs
            #print(batch["upsample"])
            feat = batch["feat"].cuda(0, non_blocking=True)
            labels = batch["labels_orig"].cuda(0, non_blocking=True)
            batch["upsample"] = [
                up.cuda(0, non_blocking=True) for up in batch["upsample"]
            ]
            cell_ind = batch["cell_ind"].cuda(0, non_blocking=True)
            occupied_cell = batch["occupied_cells"].cuda(0, non_blocking=True)
            neighbors_emb = batch["neighbors_emb"].cuda(0, non_blocking=True)
            net_inputs = (feat, cell_ind, occupied_cell, neighbors_emb)

            with torch.no_grad():
                out = model(*net_inputs, stop)

            break
    
    out, embed, tokens = out[0], out[1], out[2]
    
    # Voxels to points
    token_upsample = []
    temp = None
    for id_b, closest_point in enumerate(batch["upsample"]):
        print(id_b)
        print(closest_point)
        temp = tokens[id_b, :, closest_point]
        token_upsample.append(temp.T)
    token_2 = torch.cat(token_upsample, dim=0)
    
    import math

    mean_dist = torch.zeros((15,15))
    for i in range(16):
        for j in range(16):
            if i != 8 and j != 8:
                label_indices = torch.nonzero(labels == i).squeeze()
                label_indices_2 = torch.nonzero(labels == j).squeeze()
                distance = torch.cdist(token_2[label_indices, :], token_2[label_indices_2, :], p=2)
                mean = torch.mean(distance)
                if not math.isnan(mean):
                    if i > 8:
                        d_i = i-1
                    else:
                        d_i = i
                    if j > 8:
                        d_j = j - 1
                    else:
                        d_j = j
                    mean_dist[d_i,d_j] = mean
                    
    # Option 1: Use matplotlib to visualize the matrix as a heatmap
    plt.figure(figsize=(5, 5))
    plt.imshow(mean_dist.cpu(), cmap='hot', interpolation='nearest')
    plt.colorbar()  # Add color bar to interpret the values
    plt.title(f'Distance between classes layer {stop}')
    plt.savefig(f'./results/nuscenes_features_2/distance_cmap_{stop}.png')  # Save the heatmap as an image
    plt.show()  # Display the heatmap

3


IndexError: tuple index out of range

In [5]:
len(train_loader)

404

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision.datasets import MNIST

# Note: this example requires the torchmetrics library: https://torchmetrics.readthedocs.io
import torchmetrics
from tqdm import tqdm

import torchhd
from torchhd.models import Centroid
from torchhd import embeddings


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using {} device".format(device))

DIMENSIONS = 10000
FEAT_SIZE = 768
NUM_LEVELS = 1000
BATCH_SIZE = 1  # for GPUs with enough memory we can process multiple images at ones


class Encoder(nn.Module):
    def __init__(self, out_features, size, levels):
        super(Encoder, self).__init__()
        self.flatten = torch.nn.Flatten()
        self.position = embeddings.Random(size, out_features)
        self.value = embeddings.Level(levels, out_features)

    def forward(self, x):
        sample_hv = torchhd.bind(self.position.weight, self.value(x))
        sample_hv = torchhd.multiset(sample_hv)
        return torchhd.hard_quantize(sample_hv)


encode = Encoder(DIMENSIONS, FEAT_SIZE, NUM_LEVELS)
encode = encode.to(device)

num_classes = 16
model_hd = Centroid(DIMENSIONS, num_classes)
model_hd = model_hd.to(device)

Using cuda device


In [None]:
stop = 0
for it, batch in enumerate(train_loader):
    if it == 0:
        # Network inputs
        #print(batch["upsample"])
        feat = batch["feat"].cuda(0, non_blocking=True)
        labels = batch["labels_orig"].cuda(0, non_blocking=True)
        batch["upsample"] = [
            up.cuda(0, non_blocking=True) for up in batch["upsample"]
        ]
        cell_ind = batch["cell_ind"].cuda(0, non_blocking=True)
        occupied_cell = batch["occupied_cells"].cuda(0, non_blocking=True)
        neighbors_emb = batch["neighbors_emb"].cuda(0, non_blocking=True)
        #net_inputs = (feat, cell_ind, occupied_cell, neighbors_emb)

        with torch.no_grad():
            out = model(feat, cell_ind, occupied_cell, neighbors_emb, stop)
            embed, tokens = out[0][0], out[1][0]
            embed = embed.transpose(0, 1)
            tokens = tokens.transpose(0, 1)
            
            print("Tokens", tokens.shape)
            
            labels_v = [[] for i in range(embed.shape[0])]
            for i, vox in enumerate(batch["upsample"][0]):
                labels_v[vox].append(labels[i])
            labels_v_single = []
            for labels_ in labels_v:
                lab_tens = torch.tensor(labels_)
                most_common_value = torch.bincount(lab_tens).argmax()
                labels_v_single.append(most_common_value)
                
            #HD Training
            for samples, labels in tqdm(zip(tokens,labels_v_single), desc="Training"):
                if labels != 255:
                    samples = samples.to(device)
                    labels = labels.to(device)
                    samples_hv = encode(samples).reshape((1, DIMENSIONS))
                    model_hd.add(samples_hv, labels)
            

            # Voxels to points
            #token_upsample = []
            #temp = None
            #for id_b, closest_point in enumerate(batch["upsample"]):
            #    temp = tokens[id_b, :, closest_point]
            #    token_upsample.append(temp.T)
            #token_2 = torch.cat(token_upsample, dim=0)

        break

In [54]:
out.shape

torch.Size([1, 16, 17483])

In [50]:
print(torch.bincount(torch.tensor(labels)))
print(len(labels_v_single))

tensor([  312,     1,     4,    81,     4,     0,   113,    16,     0,   494,
        12172,   629,   817,  2602,  4873,  4034,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0, 

  print(torch.bincount(torch.tensor(labels)))


In [None]:
with torch.no_grad():
    for samples, labels in tqdm(train_ld, desc="Training"):
        samples = samples.to(device)
        labels = labels.to(device)

        samples_hv = encode(samples)
        model.add(samples_hv, labels)

accuracy = torchmetrics.Accuracy("multiclass", num_classes=num_classes)

with torch.no_grad():
    model.normalize()

    for samples, labels in tqdm(test_ld, desc="Testing"):
        samples = samples.to(device)

        samples_hv = encode(samples)
        outputs = model(samples_hv, dot=True)
        accuracy.update(outputs.cpu(), labels)

print(f"Testing accuracy of {(accuracy.compute().item() * 100):.3f}%")