In [None]:
import sys
sys.path.insert(0, '/home/xp/stereo_toolbox/')

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
torch.backends.cudnn.benchmark = True
import matplotlib.pyplot as plt
import argparse

# auto reload modules
%load_ext autoreload
%autoreload 2

from stereo_toolbox.datasets import *
from stereo_toolbox.models import *
from stereo_toolbox.evaluation import *

In [None]:
## PSMNet
model = load_checkpoint_flexible(PSMNet(),
                                 '/home/xp/stereo_toolbox/stereo_toolbox/models/PSMNet/pretrained_sceneflow_new.tar',
                                 'state_dict')

"""
EPE: 1.1572px, 1px: 11.2908%, 2px: 6.4028%, 3px: 4.7803%
"""
# sf_res = sceneflow_test(model, device='cuda:0')

"""
KITTI2015_Dataset EPE: 4.0584%, OCC: 47.6432%, NOC: 28.1250%, ALL: 28.4160%.
KITTI2012_Dataset EPE: 3.8022%, OCC: 63.1951%, NOC: 26.5022%, ALL: 27.3239%.
MiddleburyEval3_Dataset EPE: 9.8662%, OCC: 62.2950%, NOC: 30.1842%, ALL: 34.5084%.
ETH3D_Dataset EPE: 2.3997%, OCC: 28.5613%, NOC: 14.7393%, ALL: 15.3888%.
"""
# gen_res = generalization_eval(model, device='cuda:0')

"""
Resolution: (480, 640), Avg Time: 0.0396 s, Avg Frequency: 25.2630 Hz,Avg Memory: 1787.69 MB
Resolution: (736, 1280), Avg Time: 0.1245 s, Avg Frequency: 8.0331 Hz,Avg Memory: 4956.50 MB
Resolution: (1088, 1920), Avg Time: 0.2866 s, Avg Frequency: 3.4892 Hz,Avg Memory: 10687.22 MB
"""
speed_memory_res = speed_and_memory_test(model, device='cuda:1')

In [None]:
## GwcNet_GC
model = load_checkpoint_flexible(GwcNet_GC(),
                                 '/home/xp/stereo_toolbox/stereo_toolbox/models/GwcNet/sceneflow/gwcnet-gc/checkpoint_000015.ckpt',
                                 'model')

"""
EPE: 0.9514px, 1px: 8.1138%, 2px: 4.6241%, 3px: 3.4730%"
"""
# sf_res = sceneflow_test(model, device='cuda:0')

"""
KITTI2015_Dataset EPE: 2.3801px, OCC: 29.0696%, NOC: 12.1746%, ALL: 12.5331%.
KITTI2012_Dataset EPE: 1.7062px, OCC: 45.6458%, NOC: 11.9081%, ALL: 12.6712%.
MiddleburyEval3_Dataset EPE: 6.0044px, OCC: 47.1304%, NOC: 20.4144%, ALL: 24.1094%.
ETH3D_Dataset EPE: 1.9213px, OCC: 21.3749%, NOC: 10.4911%, ALL: 11.0878%.
"""
# gen_res = generalization_eval(model, device='cuda:0')

"""
Resolution: (480, 640), Avg Time: 0.0386 s, Avg Frequency: 25.8942 Hz,Avg Memory: 1882.58 MB
Resolution: (736, 1280), Avg Time: 0.1326 s, Avg Frequency: 7.5442 Hz,Avg Memory: 5251.74 MB
Resolution: (1088, 1920), Avg Time: 0.3093 s, Avg Frequency: 3.2330 Hz,Avg Memory: 11326.84 MB
"""
speed_memory_res = speed_and_memory_test(model, device='cuda:1')

In [None]:
## CFNet
model = load_checkpoint_flexible(CFNet(),
                                 '/home/xp/stereo_toolbox/stereo_toolbox/models/CFNet/sceneflow_pretraining.ckpt',
                                 'model')
"""
EPE: 1.2879px, 1px: 10.7195%, 2px: 7.3116%, 3px: 5.9251%
"""
# sf_res = sceneflow_test(model, device='cuda:0')

"""
KITTI2015_Dataset EPE: 1.9798px, OCC: 16.4189%, NOC: 5.8712%, ALL: 6.0967%.
KITTI2012_Dataset EPE: 1.0334px, OCC: 30.2510%, NOC: 4.5758%, ALL: 5.1527%.
MiddleburyEval3_Dataset EPE: 5.7162px, OCC: 44.5492%, NOC: 16.3307%, ALL: 20.2219%.
ETH3D_Dataset EPE: 0.5862px, OCC: 11.8926%, NOC: 5.5666%, ALL: 5.8700%.
"""
# gen_res = generalization_eval(model, device='cuda:0')

"""
Resolution: (480, 640), Avg Time: 0.0481 s, Avg Frequency: 20.7859 Hz,Avg Memory: 1966.13 MB
Resolution: (736, 1280), Avg Time: 0.1434 s, Avg Frequency: 6.9758 Hz,Avg Memory: 5374.05 MB
Resolution: (1088, 1920), Avg Time: 0.3343 s, Avg Frequency: 2.9913 Hz,Avg Memory: 11526.54 MB
"""
speed_memory_res = speed_and_memory_test(model, device='cuda:1')

In [None]:
## PCWNet_GC
model = load_checkpoint_flexible(PCWNet_GC(),
                                 '/home/xp/stereo_toolbox/stereo_toolbox/models/PCWNet/PCWNet_sceneflow_pretrain.ckpt',
                                 'model')

"""
EPE: 1.0391px, 1px: 8.1380%, 2px: 4.6462%, 3px: 3.5443%
"""
# sf_res = sceneflow_test(model, device='cuda:0')

"""
KITTI2015_Dataset EPE: 1.7777px, OCC: 14.9532%, NOC: 5.5273%, ALL: 5.7416%.
KITTI2012_Dataset EPE: 0.9589px, OCC: 30.2184%, NOC: 4.0734%, ALL: 4.6669%.
MiddleburyEval3_Dataset EPE: 3.1463px, OCC: 37.9880%, NOC: 12.1703%, ALL: 15.8633%.
ETH3D_Dataset EPE: 0.5284px, OCC: 11.6673%, NOC: 5.2792%, ALL: 5.5360%.
"""
# gen_res = generalization_eval(model, device='cuda:0')

"""
Resolution: (480, 640), Avg Time: 0.0888 s, Avg Frequency: 11.2559 Hz,Avg Memory: 3067.07 MB
Resolution: (736, 1280), Avg Time: 0.2769 s, Avg Frequency: 3.6116 Hz,Avg Memory: 8629.70 MB
Resolution: (1088, 1920), Avg Time: 0.6419 s, Avg Frequency: 1.5580 Hz,Avg Memory: 18680.02 MB
"""
speed_memory_res = speed_and_memory_test(model, device='cuda:1')

In [None]:
## RAFTStereo
model = load_checkpoint_flexible(RAFTStereo(),
                                 '/home/xp/stereo_toolbox/stereo_toolbox/models/RAFTStereo/raftstereo-sceneflow.pth',
                                 )

"""
EPE: 0.7863px, 1px: 7.7104%, 2px: 4.8658%, 3px: 3.7327%
"""
# sf_res = sceneflow_test(model, device='cuda:0')

"""
KITTI2015_Dataset EPE: 1.1283px, OCC: 12.6979%, NOC: 5.3413%, ALL: 5.5269%.
KITTI2012_Dataset EPE: 0.9098px, OCC: 28.3453%, NOC: 4.2900%, ALL: 4.8351%.
MiddleburyEval3_Dataset EPE: 1.5231px, OCC: 27.9966%, NOC: 9.0575%, ALL: 11.9563%.
ETH3D_Dataset EPE: 0.3614px, OCC: 6.0158%, NOC: 2.8471%, ALL: 3.0412%.
"""
# gen_res = generalization_eval(model, device='cuda:0')

"""
Resolution: (480, 640), Avg Time: 0.1967 s, Avg Frequency: 5.0848 Hz,Avg Memory: 914.25 MB
Resolution: (736, 1280), Avg Time: 0.3624 s, Avg Frequency: 2.7596 Hz,Avg Memory: 2227.85 MB
Resolution: (1088, 1920), Avg Time: 0.7613 s, Avg Frequency: 1.3136 Hz,Avg Memory: 4598.91 MB
"""
speed_memory_res = speed_and_memory_test(model, device='cuda:1')

In [None]:
## IGEVStereo
model = load_checkpoint_flexible(IGEVStereo(),
                                 '/home/xp/stereo_toolbox/stereo_toolbox/models/IGEVStereo/sceneflow.pth',
                                 )

"""
EPE: 0.6790px, 1px: 5.7491%, 2px: 3.7320%, 3px: 2.9069%
"""
# sf_res = sceneflow_test(model, device='cuda:0')

"""
KITTI2015_Dataset EPE: 1.1868px, OCC: 14.2606%, NOC: 5.5951%, ALL: 5.7924%.
KITTI2012_Dataset EPE: 1.0131px, OCC: 33.6624%, NOC: 4.9248%, ALL: 5.5936%.
MiddleburyEval3_Dataset EPE: 1.5491px, OCC: 24.2787%, NOC: 7.2518%, ALL: 9.9079%.
ETH3D_Dataset EPE: 0.7400px, OCC: 9.7601%, NOC: 4.0635%, ALL: 4.3856%.
"""
# gen_res = generalization_eval(model, device='cuda:0')

"""
Resolution: (480, 640), Avg Time: 0.2363 s, Avg Frequency: 4.2314 Hz,Avg Memory: 686.43 MB
Resolution: (736, 1280), Avg Time: 0.3501 s, Avg Frequency: 2.8563 Hz,Avg Memory: 1504.02 MB
Resolution: (1088, 1920), Avg Time: 0.6741 s, Avg Frequency: 1.4835 Hz,Avg Memory: 2988.35 MB
"""
speed_memory_res = speed_and_memory_test(model, device='cuda:1')

In [None]:
## MonSter
model = load_checkpoint_flexible(Monster(),
                                '/home/xp/stereo_toolbox/stereo_toolbox/models/MonSter/sceneflow.pth',
                                )
"""
ImageNet normalization
EPE: 0.7192px, 1px: 7.1517%, 2px: 4.2557%, 3px: 3.2055%

[-1, 1] normalzation
EPE: 0.5201px, 1px: 4.5608%, 2px: 2.9705%, 3px: 2.3052%
"""
# sf_res = sceneflow_test(model, device='cuda:0')

"""
ImageNet normalization
KITTI2015_Dataset EPE: 0.8937px, OCC: 9.6511%, NOC: 3.3399%, ALL: 3.4776%.
KITTI2012_Dataset EPE: 0.7394px, OCC: 19.1296%, NOC: 3.0212%, ALL: 3.3673%.
MiddleburyEval3_Dataset EPE: 0.8573px, OCC: 18.7195%, NOC: 5.3854%, ALL: 7.2969%.
ETH3D_Dataset EPE: 0.2371px, OCC: 3.4634%, NOC: 1.2399%, ALL: 1.3651%.

[-1, 1] normalzation
KITTI2015_Dataset EPE: 0.8884px, OCC: 9.6433%, NOC: 3.3003%, ALL: 3.4495%.
KITTI2012_Dataset EPE: 0.7334px, OCC: 18.8246%, NOC: 3.0310%, ALL: 3.3710%.
MiddleburyEval3_Dataset EPE: 0.9325px, OCC: 18.4153%, NOC: 5.8567%, ALL: 7.6997%.
ETH3D_Dataset EPE: 0.2724px, OCC: 3.5259%, NOC: 1.3234%, ALL: 1.4525%.
"""
# gen_res = generalization_eval(model, device='cuda:0')

"""
Resolution: (480, 640), Avg Time: 0.3375 s, Avg Frequency: 2.9633 Hz,Avg Memory: 2399.86 MB
Resolution: (736, 1280), Avg Time: 0.7188 s, Avg Frequency: 1.3912 Hz,Avg Memory: 3841.63 MB
Resolution: (1088, 1920), Avg Time: 1.8735 s, Avg Frequency: 0.5338 Hz,Avg Memory: 6537.50 MB
"""
speed_memory_res = speed_and_memory_test(model, device='cuda:1')

In [None]:
# DEFOMStereo (ViT-S)
model = load_checkpoint_flexible(DEFOMStereo(),
                                '/home/xp/stereo_toolbox/stereo_toolbox/models/DEFOMStereo/defomstereo_vits_sceneflow.pth',
                                )
"""
EPE: 0.5592px, 1px: 5.9396%, 2px: 3.7223%, 3px: 2.8441%
"""
# sf_res = sceneflow_test(model, device='cuda:0')

"""KITTI2015_Dataset EPE: 1.0819px, OCC: 13.6233%, NOC: 4.9982%, ALL: 5.1943%.
KITTI2012_Dataset EPE: 0.9024px, OCC: 23.5715%, NOC: 4.3982%, ALL: 4.8102%.
MiddleburyEval3_Dataset EPE: 1.9487px, OCC: 23.8614%, NOC: 6.0614%, ALL: 8.7609%.
ETH3D_Dataset EPE: 0.2733px, OCC: 4.9148%, NOC: 2.0263%, ALL: 2.1937%.
"""
# gen_res = generalization_eval(model, device='cuda:0')

"""
Resolution: (480, 640), Avg Time: 0.1957 s, Avg Frequency: 5.1107 Hz,Avg Memory: 1062.00 MB
Resolution: (736, 1280), Avg Time: 0.3423 s, Avg Frequency: 2.9217 Hz,Avg Memory: 2424.38 MB
Resolution: (1088, 1920), Avg Time: 0.8829 s, Avg Frequency: 1.1326 Hz,Avg Memory: 4886.10 MB
"""
speed_memory_res = speed_and_memory_test(model, device='cuda:1')

In [None]:
# DEFOMStereo (ViT-L)
model = load_checkpoint_flexible(DEFOMStereo(args=argparse.Namespace(dinov2_encoder='vitl')),
                                '/home/xp/stereo_toolbox/stereo_toolbox/models/DEFOMStereo/defomstereo_vitl_sceneflow.pth',
                                )
"""
EPE: 0.4832px, 1px: 5.4918%, 2px: 3.4421%, 3px: 2.6136%
"""
# sf_res = sceneflow_test(model, device='cuda:0')

"""
KITTI2015_Dataset EPE: 1.0725px, OCC: 12.5722%, NOC: 4.7921%, ALL: 4.9853%.
KITTI2012_Dataset EPE: 0.8433px, OCC: 21.9474%, NOC: 3.8260%, ALL: 4.2137%.
MiddleburyEval3_Dataset EPE: 0.8884px, OCC: 20.6396%, NOC: 4.3891%, ALL: 6.9092%.
ETH3D_Dataset EPE: 0.2533px, OCC: 5.1446%, NOC: 2.0820%, ALL: 2.2437%.
"""
# gen_res = generalization_eval(model, device='cuda:0')

"""
Resolution: (480, 640), Avg Time: 0.2483 s, Avg Frequency: 4.0273 Hz,Avg Memory: 2451.85 MB
Resolution: (736, 1280), Avg Time: 0.5966 s, Avg Frequency: 1.6761 Hz,Avg Memory: 4005.69 MB
Resolution: (1088, 1920), Avg Time: 1.7410 s, Avg Frequency: 0.5744 Hz,Avg Memory: 6816.45 MB
"""
speed_memory_res = speed_and_memory_test(model, device='cuda:1')

In [None]:
## STTR
model = load_checkpoint_flexible(STTR(),
                                '/home/xp/stereo_toolbox/stereo_toolbox/models/STTR/sceneflow_pretrained_model.pth.tar',
                                'state_dict')

"""
EPE: 4.5613px, 1px: 15.6220%, 2px: 12.3084%, 3px: 11.3189%             
"""
# sf_res = sceneflow_test(model, device='cuda:0')

"""
KITTI2015_Dataset EPE: 2.1786px, OCC: 90.9327%, NOC: 6.8101%, ALL: 8.3029%.
KITTI2012_Dataset EPE: 2.8117px, OCC: 94.3034%, NOC: 7.1706%, ALL: 9.1719%.
MiddleburyEval3_Dataset OOM
ETH3D_Dataset EPE: 2.2964px, OCC: 50.0450%, NOC: 15.8716%, ALL: 17.5654%.
"""
# gen_res = generalization_eval(model, device='cuda:1')

"""
Resolution: (480, 640), Avg Time: 0.1556 s, Avg Frequency: 6.4286 Hz,Avg Memory: 3036.80 MB
Resolution: (736, 1280), Avg Time: 0.8468 s, Avg Frequency: 1.1808 Hz,Avg Memory: 16588.08 MB
Resolution: (1088, 1920), OOM
"""
speed_memory_res = speed_and_memory_test(model, device='cuda:2')


In [None]:
## ACVNet
model = load_checkpoint_flexible(ACVNet(),
                                '/home/xp/stereo_toolbox/stereo_toolbox/models/ACVNet/sceneflow.ckpt',
                                'model')

"""
EPE: 0.6860px, 1px: 5.1409%, 2px: 2.9201%, 3px: 2.1832%
"""
sf_res = sceneflow_test(model, device='cuda:0')

"""
KITTI2015_Dataset EPE: 2.5105px, OCC: 32.8509%, NOC: 11.2934%, ALL: 11.7108%.
KITTI2012_Dataset EPE: 2.0233px, OCC: 54.4658%, NOC: 12.9433%, ALL: 13.8876%.
MiddleburyEval3_Dataset EPE: 6.2429px, OCC: 47.3617%, NOC: 22.0709%, ALL: 25.6607%.
ETH3D_Dataset EPE: 2.4436px, OCC: 19.6435%, NOC: 8.6531%, ALL: 9.1933%.
"""
gen_res = generalization_eval(model, device='cuda:1')

"""
Resolution: (480, 640), Avg Time: 0.0494 s, Avg Frequency: 20.2593 Hz,Avg Memory: 2098.31 MB
Resolution: (736, 1280), Avg Time: 0.1664 s, Avg Frequency: 6.0091 Hz,Avg Memory: 6344.20 MB
Resolution: (1088, 1920), Avg Time: 0.3848 s, Avg Frequency: 2.5986 Hz,Avg Memory: 14021.82 MB
"""
speed_memory_res = speed_and_memory_test(model, device='cuda:2')