# Imports and Setup

## Imports

In [None]:
import os

!git clone https://github.com/zcanfes/3d-lmnet-pytorch.git
os.chdir('/content/3d-lmnet-pytorch')

print('Installing requirements')
!pip install -r requirements.txt

# Make sure you restart runtime when directed by Colab

In [None]:
import sys
import torch
sys.path.insert(1, "/content/3d-lmnet-pytorch/3d-lmnet-pytorch")
print('CUDA availability:', torch.cuda.is_available())
%cd /content/3d-lmnet-pytorch/3d-lmnet-pytorch

In [3]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
import numpy as np
import matplotlib as plt
import torch

In [None]:
torch.cuda.is_available()
need_pytorch3d=False
try:
    import pytorch3d
except ModuleNotFoundError:
    need_pytorch3d=True
if need_pytorch3d:
    if sys.platform.startswith("linux"):
        # We try to install PyTorch3D via a released wheel.
        pyt_version_str=torch.__version__.split("+")[0].replace(".", "")
        version_str="".join([
            f"py3{sys.version_info.minor}_cu",
            torch.version.cuda.replace(".",""),
            f"_pyt{pyt_version_str}"
        ])
        !pip install pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html
    else:
        # We try to install PyTorch3D from source.
        !curl -LO https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz
        !tar xzf 1.10.0.tar.gz
        os.environ["CUB_HOME"] = os.getcwd() + "/cub-1.10.0"
        !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'

## Prepare Datasets
**Important Note:** Before running these cells, you should make sure that you downloaded the datasets and moved them to the /data folder.

### ShapeNet Terms and Conditions

In order to be able to use the data, we agree the below terms and conditions:

1. Researcher shall use the Database only for non-commercial research and educational purposes.
2. Princeton University and Stanford University make no representations or warranties regarding the Database, including but not limited to warranties of non-infringement or fitness for a particular purpose.
3. Researcher accepts full responsibility for his or her use of the Database and shall defend and indemnify Princeton University and Stanford University, including their employees, Trustees, officers and agents, against any and all claims arising from Researcher's use of the Database, including but not limited to Researcher's use of any copies of copyrighted 3D models that he or she may create from the Database.
4. Researcher may provide research associates and colleagues with access to the Database provided that they first agree to be bound by these terms and conditions.
5. Princeton University and Stanford University reserve the right to terminate Researcher's access to the Database at any time.
6. If Researcher is employed by a for-profit, commercial entity, Researcher's employer shall also be bound by these terms and conditions, and Researcher hereby represents that he or she is fully authorized to enter into this agreement on behalf of such employer.
7. The law of the State of New Jersey shall apply to all disputes under this agreement.

### Unzip ShapeNet pointcloud zip

In [9]:
!unzip -q ./data/ShapeNet_pointclouds.zip -d ./data

### Download 2D images

In [None]:
!wget http://cvgl.stanford.edu/data2/ShapeNetRendering.tgz -P ./data

In [11]:
!tar -xf ./data/ShapeNetRendering.tgz -C ./data
#!rm /content/term-project/data/ShapeNetRendering.tgz

### Construct ShapeNet dataset

In [12]:
from data.shapenet import ShapeNet

# Create a dataset with train split
train_dataset = ShapeNet('train',image_model=True)
val_dataset = ShapeNet('valid',image_model=True)
test_dataset = ShapeNet('test',image_model=True)

print("Dataset is prepared for 2D model and variant 1:")
print(f'Length of train set: {len(train_dataset)}') 
print(f'Length of val set: {len(val_dataset)}') 
print(f'Length of test set: {len(test_dataset)}')  

# Create a dataset with train split
train_dataset = ShapeNet('train',image_model=True,cat=1)
val_dataset = ShapeNet('valid',image_model=True,cat=1)
test_dataset = ShapeNet('test',image_model=True,cat=1)

print("\nDataset is prepared for 2D model and variant 2 (only chair class):")
print(f'Length of train set: {len(train_dataset)}') 
print(f'Length of val set: {len(val_dataset)}') 
print(f'Length of test set: {len(test_dataset)}')  

train_dataset = ShapeNet('train')
val_dataset = ShapeNet('valid')
test_dataset = ShapeNet('test')

print("\nDataset is prepared for 3D autoencoder:")
print(f'Length of train set: {len(train_dataset)}') 
print(f'Length of val set: {len(val_dataset)}') 
print(f'Length of test set: {len(test_dataset)}')  

Dataset is prepared for 2D model and variant 1:
Length of train set: 630504
Length of val set: 210192
Length of test set: 210120

Dataset is prepared for 2D model and variant 2 (only chair class):
Length of train set: 97608
Length of val set: 32544
Length of test set: 32520

Dataset is prepared for 3D autoencoder:
Length of train set: 26271
Length of val set: 8758
Length of test set: 8755


In [13]:
train_sample = train_dataset[1]
print(f'Input images: {train_sample["img"].shape}')  
print(f'Input point cloud: {train_sample["point"].shape}')  

Input images: (3, 128, 128)
Input point cloud: (2048, 3)


### Print output shape of the 2D Encoder model (both Variant I and Variant II versions)

In [14]:
from model.model_2d import ImageEncoder
from torchsummary import summary

model2d_variational=ImageEncoder("variational",512)
model2d_variational.cuda()
input_tensor = torch.randn(1,3,128,128)
input_tensor=input_tensor.cuda()
print("input size: ",input_tensor.size())


mu,std = model2d_variational(input_tensor)
print("Mu size: ", mu.size(),", Std size: ", std.size())
summary(model2d_variational,(3,128,128))
model2d_normal=ImageEncoder("normal",512)
model2d_normal.cuda()
latent=model2d_normal(input_tensor)
print("Latent shape: ",latent.size())

input size:  torch.Size([1, 3, 128, 128])
Mu size:  torch.Size([1, 512]) , Std size:  torch.Size([1, 512])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 128, 128]             896
              ReLU-2         [-1, 32, 128, 128]               0
            Conv2d-3         [-1, 32, 128, 128]           9,248
              ReLU-4         [-1, 32, 128, 128]               0
            Conv2d-5           [-1, 64, 63, 63]          18,496
              ReLU-6           [-1, 64, 63, 63]               0
            Conv2d-7           [-1, 64, 63, 63]          36,928
              ReLU-8           [-1, 64, 63, 63]               0
            Conv2d-9           [-1, 64, 63, 63]          36,928
             ReLU-10           [-1, 64, 63, 63]               0
           Conv2d-11          [-1, 128, 31, 31]          73,856
             ReLU-12          [-1, 128, 31, 31]             

# 3D Point Cloud Autoencoder Training
Train the 3D point cloud autoencoder to learn to reconstruct a given point cloud

In [None]:
# Define the folder you want to save your trained model to
AUTOENCODER_EXP = "3d_autoencoder"
!mkdir "{AUTOENCODER_EXP}"

In [None]:
from training import train_ae

config = {
    'log_dir': f"./{AUTOENCODER_EXP}",
    'device': 'cuda:0', 
    'lr': 1e-4,
    'weight_decay': 1e-6,
    'batch_size': 32,
    'resume_ckpt': None,
    'learning_rate_model':  5e-5,
    'max_epochs': 500,  
    'num_workers': 4,
    'bottleneck': 512,
    "hidden_size" : 256,
    "output_size" : 2048*3,
    'print_every_n': 5,
    'visualize_every_n': 5,
}

train_ae.main(config)

# 2D Image Encoder Training
Train 2D Image Encoder model to match the predicted latent space to the output of 3D Encoder of pointclouds.

**Important Note:** Below training cells are arranged to used the newly trained autoencoder. If you want to use our pre-trained autoencoder model instead, you should change `"3d_autoencoder_path"` field in the config by running the following cell.

In [20]:
AUTOENCODER_EXP = "trained_models"

## Variant I - Latent Matching with L1 Loss

In [23]:
ENCODER_EXP = "2d_encoder"
!mkdir "{ENCODER_EXP}"

In [24]:
!mkdir "{ENCODER_EXP}/L1"

In [None]:
from training import train_2d_to_3d

config = {
    'experiment_name': f"./{ENCODER_EXP}/L1",
    'device': 'cuda:0',  # run this on a gpu for a reasonable training time
    'bottleneck': 512,
    "cat":13,
    'batch_size': 32,
    "loss_criterion":"L1",
    "final_layer":"normal",
    "3d_autoencoder_path": f"./{AUTOENCODER_EXP}/model_autoencoder_final.pth",
    'resume_ckpt': None,
    'learning_rate_model': 5e-5,
    'max_epochs': 30,  
    'save_every_n': 1,
    'validate_every_n': 3,
    "autoencoder_bottleneck":512,
    "autoencoder_hidden_size":256,
    "autoencoder_output_size":2048*3,
}

train_2d_to_3d.main(config)

## Variant I - Latent Matching with L2 Loss

In [30]:
!mkdir "{ENCODER_EXP}/L2"

In [None]:
from training import train_2d_to_3d

config = {
    'experiment_name': f"./{ENCODER_EXP}/L2",
    'device': 'cuda:0',  # run this on a gpu for a reasonable training time
    'bottleneck': 512,
    "cat":13,
    'batch_size': 32,
    "loss_criterion":"L2",
    "final_layer":"normal",
    "3d_autoencoder_path": f"./{AUTOENCODER_EXP}/model_autoencoder_final.pth",
    'resume_ckpt': None,
    'learning_rate_model':  5e-5,
    'max_epochs': 30,  
    'save_every_n': 1,
    'validate_every_n': 3,
    "autoencoder_bottleneck":512,
    "autoencoder_hidden_size":256,
    "autoencoder_output_size":2048*3,
}

train_2d_to_3d.main(config)

## Variant II - Probabilistic Latent Matching

In [33]:
!mkdir "{ENCODER_EXP}/DIV"

In [None]:
from training import train_2d_to_3d

config = {
    'experiment_name': f"./{ENCODER_EXP}/DIV",
    'device': 'cuda:0',  # run this on a gpu for a reasonable training time
    'bottleneck': 512,
    "cat":1,
    'batch_size': 32,
    "loss_criterion":"variational",
    "final_layer":"variational",
    "3d_autoencoder_path": f"./{AUTOENCODER_EXP}/model_autoencoder_final.pth",
    'resume_ckpt': None,
    'learning_rate_model':  5e-5,
    'max_epochs': 20,  
    'save_every_n': 1,
    'validate_every_n': 3,
    "autoencoder_bottleneck":512,
    "autoencoder_hidden_size":256,
    "autoencoder_output_size":2048*3,
    "alpha":0.2,
    "penalty_angle":20,
    "lambda":5.5
}

train_2d_to_3d.main(config)

# Inference
Infer pointclouds using the trained 2D Image Encoder and 3D Pointcloud Decoder models. 

**Important Note:** Before running these cells, you should download the pre-trained models and create a folder "trained_models" and move the models you want to use for inference to trained_models/.

## Variant I - Inferences

### 2D Image Encoder with L1

In [None]:
# If you want to infer with the models you trained above uncomment and run this cell

# EXP_NAME = "{ENCODER_EXP}/L1/model_final.pth"
# AUTOENCODER_EXP = "3d_autoencoder"

In [40]:
# If you want to infer the pre-trained models run this cell
EXP_NAME = "trained_models/model_epoch_L1_30.pth"

In [38]:
# Define the folders you want to save the results to

RESULTS = "image_encoder_L1_results"
PRED = "pred"
GT = "gt"

!mkdir "{RESULTS}"
!mkdir "{RESULTS}/{PRED}"
!mkdir "{RESULTS}/{GT}"
!mkdir "{RESULTS}/{GT}/pointcloud"
!mkdir "{RESULTS}/{GT}/image"

In [None]:
from inference import inference_2d_to_3d 

config = {
    'encoder_path': f'./{EXP_NAME}',
    'device': 'cuda:0',  # run this on a gpu for a reasonable training time
    'bottleneck': 512,
    "2d_inference_pred":f"./{RESULTS}/{PRED}/",
    "2d_inference_gt_point":f"./{RESULTS}/{GT}/pointcloud/",
    "2d_inference_gt_img":f"./{RESULTS}/{GT}/image/",
    "cat":13,
    'batch_size': 1,
    "loss_criterion":"L1",
    "final_layer":"normal",
    "3d_autoencoder_path":f"./{AUTOENCODER_EXP}/model_autoencoder_final.pth",
    'resume_ckpt': None,
    'learning_rate_model':  5e-5,
    "autoencoder_bottleneck":512,
    "autoencoder_hidden_size":256,
    "autoencoder_output_size":2048*3

}
inference_2d_to_3d.main(config)

### 2D Image Encoder with L2

In [41]:
# If you want to infer the model you trained above uncomment and run this cell

# EXP_NAME = "./{ENCODER_EXP}/L2/model_final.pth"

In [42]:
# If you want to infer the pre-trained models run this cell

EXP_NAME = "trained_models/model_epoch_L2_30.pth"

In [None]:
# Define the folders you want to save the results to

RESULTS = "image_encoder_L2_results"
PRED = "pred"
GT = "gt"

!mkdir "{RESULTS}"
!mkdir "{RESULTS}/{PRED}"
!mkdir "{RESULTS}/{GT}"
!mkdir "{RESULTS}/{GT}/pointcloud"
!mkdir "{RESULTS}/{GT}/image"

In [None]:
from inference import inference_2d_to_3d 

config = {
    'encoder_path': f'./{EXP_NAME}',
    'device': 'cuda:0',  # run this on a gpu for a reasonable training time
    'bottleneck': 512,
    "2d_inference_pred":f"./{RESULTS}/{PRED}/",
    "2d_inference_gt_point":f"./{RESULTS}/{GT}/pointcloud/",
    "2d_inference_gt_img":f"./{RESULTS}/{GT}/image/",
    "cat":13,
    'batch_size': 1,
    "loss_criterion":"L2",
    "final_layer":"normal",
    "3d_autoencoder_path":f"./{AUTOENCODER_EXP}/model_autoencoder_final.pth",
    'resume_ckpt': None,
    'learning_rate_model':  5e-5,
    "autoencoder_bottleneck":512,
    "autoencoder_hidden_size":256,
    "autoencoder_output_size":2048*3

}
inference_2d_to_3d.main(config)

## Variant II - Inferences

In [None]:
# If you want to infer the model you trained above uncomment and run this cell

# EXP_NAME = "./{ENCODER_EXP}/DIV/model_final.pth"

In [47]:
# If you want to infer the pre-trained models run this cell

# If lambda = 5.5:
EXP_NAME = "trained_models/model_epoch_30_55.pth"

# If lambda = 0.5:
# EXP_NAME = "trained_models/model_epoch_30_05.pth"

# If lambda = 0.0:
# EXP_NAME = "trained_models/model_epoch_30_00.pth"

In [48]:
# Define the folders you want to save the results to

RESULTS = "image_encoder_DIV_results"
PRED = "pred"
GT = "gt"

!mkdir "{RESULTS}"
!mkdir "{RESULTS}/{PRED}"
!mkdir "{RESULTS}/{GT}"
!mkdir "{RESULTS}/{GT}/pointcloud"
!mkdir "{RESULTS}/{GT}/image"

In [None]:
from inference import inference_2d_to_3d 

config = {
    'encoder_path': f'./{EXP_NAME}',
    'device': 'cuda:0',  # run this on a gpu for a reasonable training time
    'is_overfit': False,
    'bottleneck': 512,
    "2d_inference_pred":f"./{RESULTS}/{PRED}/",
    "2d_inference_gt_point":f"./{RESULTS}/{GT}/pointcloud/",
    "2d_inference_gt_img":f"./{RESULTS}/{GT}/image/",
    "cat":1,
    'batch_size': 1,
    "loss_criterion":"variational",
    "final_layer":"variational",
    "3d_autoencoder_path":f"./{AUTOENCODER_EXP}/model_autoencoder_final.pth",
    'resume_ckpt': None,
    'learning_rate_model':  5e-5,
    "autoencoder_bottleneck":512,
    "autoencoder_hidden_size":256,
    "autoencoder_output_size":2048*3,
}
inference_2d_to_3d.main(config)

## 3D Point Cloud Inferences

In [None]:
# If you want to infer the model you trained above uncomment and run this cell

# VAR_EXP_NAME = "./{AUTOENCODER_EXP}/model_autoencoder_final.pth"

In [51]:
# If you want to infer the pre-trained models run this cell

EXP_NAME = "trained_models/model_autoencoder_final.pth"

In [52]:
# Define the folders you want to save the results to

RESULTS = "autoencoder_results"
PRED = "pred"
GT = "gt"

!mkdir "{RESULTS}"
!mkdir "{RESULTS}/{PRED}"
!mkdir "{RESULTS}/{GT}"

In [None]:
from inference import infer_3d

config = {
    "autoencoder":f"./{EXP_NAME}",
    "infer_gt":f"./{RESULTS}/{GT}/",
    "infer_pred": f"./{RESULTS}/{PRED}/",
    'bottleneck': 512,
    'batch_size': 1,
    'num_workers': 4,
    "input_size" : 256,
    "hidden_size" : 256,
    "output_size" : 2048*3,
}
infer_3d.main(config)

# Visualize Reconstructed Point Clouds
After the inference, you can use these cells to render the point clouds

In [54]:
import os
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt

# Util function for loading point clouds|
import numpy as np

# Data structures and functions for rendering
from PIL import Image
from pytorch3d.structures import Pointclouds
from pytorch3d.vis.plotly_vis import AxisArgs, plot_batch_individually, plot_scene
from pytorch3d.renderer import (
    look_at_view_transform,
    FoVOrthographicCameras, 
    PointsRasterizationSettings,
    PointsRenderer,
    PulsarPointsRenderer,
    PointsRasterizer,
    AlphaCompositor,
    NormWeightedCompositor,
    FoVPerspectiveCameras
)

# Setup
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    torch.cuda.set_device(device)
else:
    device = torch.device("cpu")

In [55]:
# Initialize a camera.
R, T = look_at_view_transform(20, 14, -46)
cameras = FoVOrthographicCameras(device=device, R=R, T=T, znear=0.01)

# Define the settings for rasterization and shading. Here we set the output image to be of size
# 512x512. As we are rendering images for visualization purposes only we will set faces_per_pixel=1
# and blur_radius=0.0. Refer to rasterize_points.py for explanations of these parameters. 
raster_settings = PointsRasterizationSettings(
    image_size=512, 
    radius = 0.005,
    points_per_pixel = 10
)


# Create a points renderer by compositing points using an weighted compositor (3D points are
# weighted according to their distance to a pixel and accumulated using a weighted sum)
renderer = PointsRenderer(
    rasterizer=PointsRasterizer(cameras=cameras, raster_settings=raster_settings),
    compositor=NormWeightedCompositor(background_color=(255,255,255)),
)

In [None]:
# Here provide the paths of reconstructed point clouds and images. The paths shouldn't contain the extension ".npy"
INPUT_IMG_PATH = "..."
GT_POINT_CLOUD_PATH = "..."
PREDICTED_POINT_CLOUD_PATH = "..."

file_names = [GT_POINT_CLOUD_PATH, PREDICTED_POINT_CLOUD_PATH]

# Load and save the input image
image = np.load(f"{INPUT_IMG_PATH}.npy")
im = Image.fromarray(image[0].transpose(1,2,0).astype(np.uint8))
im.save(f"{INPUT_IMG_PATH}.png")


# Load point clouds
for name in file_names:
  input = np.load(f'{name}.npy')

  if input.shape[2]!=3:
    input=input.transpose(0,2,1)

  x = input[0,:, 0]
  y = input[0,:, 1]
  z = input[0,:, 2]

  pts = np.stack((y,-x,z), axis = 1) 
  verts = torch.Tensor(pts).to(device)
  # You can change the color of point clouds here
  rgb =  torch.tensor([72.45,251.85,528]) * torch.ones(pts.shape) / 1000.

  point_cloud = Pointclouds(points=[verts], features=[rgb])

  images = renderer(point_cloud)
  plt.figure(figsize=(9, 11))
  plt.imshow(images[0, ..., :3].cpu().numpy())
  plt.axis("off");
  plt.tight_layout()
  # Save rendered point cloud image
  plt.savefig(f'{name}.png', dpi=300)