In [1]:
import torch
import torch.nn as nn
import os
import numpy as np

In [2]:
import trimesh
import tensorflow as tf

DATA_DIR = tf.keras.utils.get_file(
    "modelnet.zip",
    "http://3dvision.princeton.edu/projects/2014/3DShapeNets/ModelNet10.zip",
    extract=True,
)
DATA_DIR = os.path.join(os.path.dirname(DATA_DIR), "ModelNet10")

2023-09-02 02:24:39.765696: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Downloading data from http://3dvision.princeton.edu/projects/2014/3DShapeNets/ModelNet10.zip


In [3]:
import glob
from torch.utils.data import DataLoader, TensorDataset, RandomSampler

num_classes = 10
batch_size = 8

# bathtub 0, bed 1, chair 2, desk 3, dresser 4, monitor 5, night_stand 6, sofa 7, table 8, toilet 9
item_names = ["bathtub", "bed", "chair", "desk", "dresser", "monitor", "night_stand", "sofa", "table", "toilet"]
train_labels = np.array([])
train_inputs = []
test_labels = np.array([])
test_inputs = []


for i in range(len(item_names)):
# for i in range(2):
    item_name = item_names[i]
    
    train_files = glob.glob(os.path.join(DATA_DIR, item_name, "train/*.off"))
    train_labels = np.concatenate([train_labels, np.ones(len(train_files)) * i])
    for file in train_files:
        train_inputs.append(trimesh.load(file).sample(4096))
        
    test_files = glob.glob(os.path.join(DATA_DIR, item_name, "test/*.off"))
    test_labels = np.concatenate([test_labels, np.ones(len(test_files)) * i])
    for file in test_files:
        test_inputs.append(trimesh.load(file).sample(4096))


train_inputs = torch.tensor(train_inputs)
train_labels = torch.tensor(train_labels).flatten()

test_inputs = torch.tensor(test_inputs)
test_labels = torch.tensor(test_labels).flatten()

# normalization
train_inputs = train_inputs - train_inputs.mean(dim=1).unsqueeze(1)
train_inputs = train_inputs / torch.max(torch.sqrt(torch.sum(train_inputs ** 2, dim=2)), dim=1).values.view(-1, 1, 1)
test_inputs = test_inputs - test_inputs.mean(dim=1).unsqueeze(1)
test_inputs = test_inputs / torch.max(torch.sqrt(torch.sum(test_inputs ** 2, dim=2)), dim=1).values.view(-1, 1, 1)


train_dataset = TensorDataset(train_inputs, train_labels)
random_sampler = RandomSampler(train_dataset)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, sampler=random_sampler)

test_dataset = TensorDataset(test_inputs, test_labels)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)


  train_inputs = torch.tensor(train_inputs)


In [4]:
torch.save({
    'train_inputs': train_inputs,
    'train_labels': train_labels,
    'test_inputs': test_inputs,
    'test_labels': test_labels
}, 'cls_dataset.pt')

In [5]:
import datetime
import time
import torch.nn.functional as F
from transformers import AdamW
from sklearn.metrics import f1_score
from transformers import AdamW, get_linear_schedule_with_warmup
import sys

EPOCHS = 10
LEARNING_RATE = 0.001
EPS = 1e-8
WARMUP = 100

def update_progress(progress):
    sys.stdout.write('\r%d%%' % progress)
    # sys.stdout.write(f'{progress}%  {msg}')
    sys.stdout.flush()
    
def format_time(time):
    time_rounded = int(round((time)))
    return str(datetime.timedelta(seconds=time_rounded))


def train_model(model, epochs, datalodaer):
    optimizer = AdamW(model.parameters())
    num_training_steps = len(datalodaer) * epochs
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=WARMUP, num_training_steps=num_training_steps)
    
    model.to('cuda')
    model.train()

    batch_size = datalodaer.batch_size
    num_data = len(datalodaer) * batch_size
    start_time = time.time()

    print(" --- training model")
    for epoch in range(epochs):
        total_loss = 0
        epoch_start_time = time.time()
        
        for step, batch in enumerate(datalodaer):
            batch_inputs = tuple(t.to('cuda') for t in batch)
            inputs = batch_inputs[0].type(torch.float32)
            labels = batch_inputs[1]

            output = model(inputs)
            loss = F.cross_entropy(output, labels.long())
            total_loss += loss.item()
            
            optimizer.zero_grad(set_to_none=True)
            loss.backward()
            optimizer.step()
            scheduler.step()

            update_progress((step+1)*batch_size / num_data * 100)

        avg_train_loss = total_loss / len(datalodaer)
        
        
        print(f' {epoch+1}/{epochs} - elapsed: {format_time(time.time() - epoch_start_time)}, average train loss: {avg_train_loss}')

    print(f' --- train finished, elapsed: {format_time(time.time() - start_time)}')

In [6]:

from model import PointTransformer

class PointTransformerCls(nn.Module):
    def __init__(self, n_classes) -> None:
        super().__init__()
        self.transformer = PointTransformer(3)
        self.cls = nn.Sequential(
            nn.Linear(512, 256),
            # nn.BatchNorm1d(256),
            nn.GELU(),
            # nn.Dropout(),
            nn.Linear(256, 128),
            # nn.BatchNorm1d(128),
            nn.GELU(),
            # nn.Dropout(),
            nn.Linear(128, n_classes)
        )
    
    def forward(self, x):
        x = self.transformer(x) # b, n, 512
        x = torch.mean(x, dim=1)

        return self.cls(x)

In [7]:
model = PointTransformerCls(num_classes)
model.cuda()
model.train()
train_model(model, 10, train_dataloader)
torch.save(model, "./cls_trained.pt")
# model = torch.load("cls_trained.pt")
# model.cuda()



 --- training model
100% 1/10 - elapsed: 0:05:21, average train loss: 0.8805732654859284
100% 2/10 - elapsed: 0:05:22, average train loss: 0.4353672843267824
100% 3/10 - elapsed: 0:05:23, average train loss: 0.3224945923958332
100% 4/10 - elapsed: 0:05:54, average train loss: 0.24662769633370502
100% 5/10 - elapsed: 0:06:14, average train loss: 0.2052340107808498
100% 6/10 - elapsed: 0:06:14, average train loss: 0.17295985943354333
100% 7/10 - elapsed: 0:06:13, average train loss: 0.1463711571493782
100% 8/10 - elapsed: 0:06:14, average train loss: 0.11660846313025978
100% 9/10 - elapsed: 0:06:11, average train loss: 0.09290352984688084
100% 10/10 - elapsed: 0:05:58, average train loss: 0.08250303392468593
 --- train finished, elapsed: 0:59:04


In [8]:
def test_model(model, dataloader, draw = False):
    test_loss = 0
    labels = np.array([])
    predictions = np.array([])

    model.to('cuda')
    model.eval()

    batch_size = dataloader.batch_size
    num_data = len(dataloader) * batch_size
    start_time = time.time()

    with torch.no_grad():
        for step, batch in enumerate(dataloader):
            batch_inputs = tuple(t.to('cuda') for t in batch)
            inputs = batch_inputs[0].type(torch.float32)
            label = batch_inputs[1]

            output = model(inputs)
            loss = F.cross_entropy(output, label.long())
            test_loss += loss.item()
            
            softmaxed_output = F.softmax(output, dim=1)
            prediction = softmaxed_output.argmax(dim=1).detach().cpu().numpy()
            predictions = np.concatenate([predictions, prediction])
            labels = np.concatenate([labels, label.cpu().long().numpy()])
            

            update_progress((step * batch_size) / num_data * 100)
            
    n_rights = predictions[predictions == labels].shape[0]
    

    test_loss /= num_data
    print(f'\nloss: {test_loss}, {n_rights}/{num_data}, f1_score : {f1_score(labels, predictions, average="micro")}')
    print(f' --- evaluation finished {format_time(time.time() - start_time)}')


In [9]:
model.eval()
test_model(model, test_dataloader)

99%
loss: 0.025643586738504018, 839/912, f1_score : 0.9240088105726872
 --- evaluation finished 0:01:05
