
```shell script
python3 download_data.py [3dod/upsampling/raw] --split [Training/Validation] --video_id video_id1 video_id2 \
--download_dir YOUR_DATA_FOLDER

python3 download_data.py [3dod/upsampling/raw] --video_id_csv CSV_PATH \
--download_dir YOUR_DATA_FOLDER

```


```bash
python3 download_data.py raw --video_id_csv raw/test.csv --download_dir ../datasets/ARKitScenes/ --raw_dataset_assets lowres_wide.traj lowres_wide lowres_wide_intrinsics lowres_depth
```
python3 download_data.py raw --video_id_csv raw/test.csv --download_dir ../datasets/ARKitScenes/ --raw_dataset_assets vga_wide vga_wide_intrinsics

- mov：下载 iPhone ARKit 录制的视频，你可以用 ffmpeg 抽帧得到 RGB png

- highres_depth / lowres_depth / confidence：高精度 FARO 深度、Apple LiDAR 低分辨率深度及其置信度。

- lowres_wide_intrinsics、vga_wide_intrinsics：分别是 iPhone 主摄和副摄的相机内参。

- lowres_wide.traj：低分辨率 LiDAR + 主摄的外参轨迹（时间戳对应视频帧）。


wget -O model.ckpt https://huggingface.co/depth-anything/prompt-depth-anything-vitl/resolve/main/model.ckpt

In [3]:
import shutil
from pathlib import Path

path1 = Path("datasets/ARKitScenes/raw/Validation")
path2 = Path("datasets/ARKitScenes/raw/Training")

for scene_dir in path1.iterdir():
    if not scene_dir.is_dir():
        continue
    # 如过scene_dir下有vga_wide_intrinsics文件夹，删除
    # if (scene_dir / "vga_wide_intrinsics").exists():
    #     print(f"删除无效目录：{scene_dir.name}/vga_wide_intrinsics")
    #     shutil.rmtree(scene_dir / "vga_wide_intrinsics")
    # 如果目录下文件没有 lowres_depth 或者 wide_intrinsics 或者 wide 或者 lowres_wide.traj就删除整个目录
    if not any(scene_dir.glob("lowres_depth")) or \
       not any(scene_dir.glob("lowres_wide")) or \
       not any(scene_dir.glob("lowres_wide_intrinsics")) or \
       not any(scene_dir.glob("lowres_wide.traj")):
        print(f"删除无效场景：{scene_dir.name}")
        shutil.rmtree(scene_dir)
        continue

for scene_dir in path2.iterdir():
    if not scene_dir.is_dir():
        continue
    # 如过scene_dir下有vga_wide_intrinsics文件夹，删除
    # if (scene_dir / "vga_wide_intrinsics").exists():
    #     print(f"删除无效目录：{scene_dir.name}/vga_wide_intrinsics")
    #     shutil.rmtree(scene_dir / "vga_wide_intrinsics")
    if not any(scene_dir.glob("lowres_depth")) or \
       not any(scene_dir.glob("lowres_wide")) or \
       not any(scene_dir.glob("lowres_wide_intrinsics")) or \
       not any(scene_dir.glob("lowres_wide.traj")):
        print(f"删除无效场景：{scene_dir.name}")
        shutil.rmtree(scene_dir)
        continue


删除无效场景：41069165
删除无效场景：41048071


In [8]:
import shutil
from pathlib import Path

root = Path("datasets/ARKitScenes")

# src_base = root / "raw" / "Validation"
# dst_base = root / "Validation"

src_base = root / "raw" / "Training"
dst_base = root / "Training"


for scene_dir in src_base.iterdir():
    if not scene_dir.is_dir():
        continue
    dst_dir = dst_base / scene_dir.name
    if not dst_dir.exists():
        continue

    # 遍历源目录下的所有文件/文件夹
    for item in scene_dir.iterdir():
        target = dst_dir / item.name
        if target.exists():
            print(f"跳过已存在: {target}")
            continue
        # 移动
        shutil.move(str(item), str(dst_dir))

    print(f"已处理场景：{scene_dir.name}")


已处理场景：43896249
已处理场景：42898447
已处理场景：44796484
已处理场景：42899175
已处理场景：42897771
已处理场景：42897939
已处理场景：43649408
已处理场景：42898560
已处理场景：42899433
已处理场景：43828144
已处理场景：43896177
已处理场景：43649605
已处理场景：42444721
已处理场景：42898337
已处理场景：42897960
已处理场景：42899214
已处理场景：42898754
已处理场景：44796598
已处理场景：42897712
已处理场景：42899139
已处理场景：42898065
已处理场景：42898230
已处理场景：43649480
已处理场景：43828339
已处理场景：44358291
已处理场景：42897426
已处理场景：44796562
已处理场景：42899125
已处理场景：43896089
已处理场景：44796584
已处理场景：42445592
已处理场景：43828457
已处理场景：42898340
已处理场景：42445771
已处理场景：43649714
已处理场景：45260952
已处理场景：42899128
已处理场景：42445168
已处理场景：42898405
已处理场景：43649648
已处理场景：44358234
已处理场景：43649399
已处理场景：43896170
已处理场景：42899221
已处理场景：42444876
已处理场景：42899630
已处理场景：43828149
已处理场景：43649692
已处理场景：42899775
已处理场景：42445057
已处理场景：42898449
已处理场景：42447233
已处理场景：42445728
已处理场景：42899666
已处理场景：43895969
已处理场景：42899215
已处理场景：43828391
已处理场景：42897756
已处理场景：42447287
已处理场景：42899118
已处理场景：42898100
已处理场景：43649484
已处理场景：44796190
已处理场景：44796387
已处理场景：44796324
已处理场景：43649403
已处理场景：4244

In [None]:
import json
import random
from pathlib import Path

# —— 1. 参数区 —— 
# 改成你本地路径
TRAIN_ROOT = Path("datasets/ARKitScenes/Training")

# 每个场景选择多少 context / target
NUM_CONTEXT = 2
NUM_TARGET  = 3
SEED = 42

random.seed(SEED)
eval_index = {}

for scene_dir in sorted(TRAIN_ROOT.iterdir()):
    if not scene_dir.is_dir():
        continue

    wide_dir = scene_dir / "wide"
    if not wide_dir.exists():
        eval_index[scene_dir.name] = None
        continue

    # 2) 提取 (timestamp, path) 对
    ts_frames = []
    for p in wide_dir.iterdir():
        if p.suffix.lower() not in (".png", ".jpg"):
            continue
        # 文件名格式：<videoID>_<timestamp>.png
        stem = p.stem  # "41048190_3644.288"
        try:
            ts = float(stem.split("_", 1)[1])
        except Exception:
            continue
        ts_frames.append((ts, p))

    # 3) 按时间戳排序
    ts_frames.sort(key=lambda x: x[0])
    n = len(ts_frames)
    if n < NUM_CONTEXT + NUM_TARGET:
        eval_index[scene_dir.name] = None
        continue

    # 4) 用排好序的索引来随机采样
    all_idx = list(range(n))
    context = sorted(random.sample(all_idx, NUM_CONTEXT))
    remaining = [i for i in all_idx if i not in context]
    target  = sorted(random.sample(remaining, NUM_TARGET))

    eval_index[scene_dir.name] = {
        "context": context,
        "target":  target,
    }

# 5) 写出 JSON
print(eval_index)
out_path = TRAIN_ROOT.parent / "train_index_acid.json"
with open(out_path, "w") as f:
    json.dump(eval_index, f)

print(f"✅ 完成：共处理 {len(eval_index)} 个场景，文件保存在 {out_path}")

{'41048190': {'context': [3, 14], 'target': [30, 33, 37]}, '41048223': {'context': [6, 8], 'target': [5, 36, 39]}, '41048225': {'context': [2, 27], 'target': [1, 6, 14]}, '41048229': {'context': [14, 32], 'target': [0, 18, 20]}, '41048247': {'context': [3, 5], 'target': [4, 6, 7]}, '41048249': {'context': [3, 7], 'target': [0, 2, 5]}, '41048251': None, '41048262': {'context': [20, 54], 'target': [19, 36, 44]}, '41048264': {'context': [27, 43], 'target': [11, 13, 50]}, '41048265': {'context': [12, 45], 'target': [34, 46, 79]}, '42444474': {'context': [1, 14], 'target': [4, 13, 19]}, '42444477': {'context': [5, 35], 'target': [19, 42, 55]}, '42444490': {'context': [23, 36], 'target': [2, 4, 12]}, '42444499': {'context': [3, 10], 'target': [1, 4, 5]}, '42444501': {'context': [3, 12], 'target': [9, 11, 16]}, '42444503': {'context': [11, 26], 'target': [5, 6, 12]}, '42444511': {'context': [4, 10], 'target': [1, 12, 13]}, '42444512': {'context': [9, 10], 'target': [2, 3, 8]}, '42444513': {'c

In [None]:
import json
import random
from pathlib import Path

# —— 1. 参数区 —— 
TRAIN_ROOT = Path("datasets/ARKitScenes/Validation")

# 每个场景选择多少 context / target
NUM_CONTEXT = 2
NUM_TARGET  = 3
SEED = 42

random.seed(SEED)
eval_index = {}

for scene_dir in sorted(TRAIN_ROOT.iterdir()):
    if not scene_dir.is_dir():
        continue

    wide_dir = scene_dir / "wide"
    if not wide_dir.exists():
        eval_index[scene_dir.name] = None
        continue

    # 2) 提取 (timestamp, path) 对
    ts_frames = []
    for p in wide_dir.iterdir():
        if p.suffix.lower() not in (".png", ".jpg"):
            continue
        # 文件名格式：<videoID>_<timestamp>.png
        stem = p.stem  # "41048190_3644.288"
        try:
            ts = float(stem.split("_", 1)[1])
        except Exception:
            continue
        ts_frames.append((ts, p))

    # 3) 按时间戳排序
    ts_frames.sort(key=lambda x: x[0])
    n = len(ts_frames)
    if n < NUM_CONTEXT + NUM_TARGET:
        eval_index[scene_dir.name] = None
        continue

    # 4) 用排好序的索引来随机采样
    all_idx = list(range(n))
    context = sorted(random.sample(all_idx, NUM_CONTEXT))
    remaining = [i for i in all_idx if i not in context]
    target  = sorted(random.sample(remaining, NUM_TARGET))

    eval_index[scene_dir.name] = {
        "context": context,
        "target":  target,
    }

# 5) 写出 JSON
print(eval_index)
out_path = TRAIN_ROOT.parent / "test_index_acid.json"
with open(out_path, "w") as f:
    json.dump(eval_index, f)

print(f"✅ 完成：共处理 {len(eval_index)} 个场景，文件保存在 {out_path}")

{'41069021': {'context': [0, 3], 'target': [9, 10, 18]}, '41069025': {'context': [4, 23], 'target': [3, 22, 25]}, '41069042': {'context': [2, 17], 'target': [1, 14, 20]}, '41069043': {'context': [1, 5], 'target': [8, 9, 18]}, '41069046': {'context': [1, 35], 'target': [13, 15, 27]}, '41069048': {'context': [7, 9], 'target': [0, 4, 6]}, '41069050': {'context': [2, 5], 'target': [3, 4, 6]}, '41069051': {'context': [4, 6], 'target': [1, 3, 12]}, '41142278': {'context': [3, 12], 'target': [9, 13, 21]}, '41142280': {'context': [1, 14], 'target': [2, 7, 9]}, '41142281': {'context': [2, 17], 'target': [10, 21, 22]}, '42444946': {'context': [27, 28], 'target': [6, 11, 18]}, '42444949': {'context': [2, 22], 'target': [1, 8, 23]}, '42444950': {'context': [5, 18], 'target': [3, 8, 29]}, '42444966': {'context': [8, 12], 'target': [2, 13, 16]}, '42444968': {'context': [11, 18], 'target': [6, 8, 12]}, '42444976': {'context': [20, 21], 'target': [2, 5, 19]}, '42445021': {'context': [8, 11], 'target':

In [11]:
import torch

def inspect_checkpoint(path, show_shapes=True, show_summary=True, num=2):
    """
    加载并检查一个 PyTorch checkpoint 文件的内容。

    Args:
        path (str): checkpoint 文件路径（.pt/.pth）。
        show_shapes (bool): 是否打印每个参数的名字和形状。
        show_summary (bool): 是否打印参数总数统计。

    Returns:
        state_dict (dict): checkpoint 中实际加载的 state_dict。
    """
    # 1. 加载 checkpoint
    ckpt = torch.load(path, map_location="cpu")
    # 如果是包含 'state_dict' 的 dict，就提取它
    state_dict = ckpt.get("state_dict", ckpt)
    name = [x.split('.')[num] for x in state_dict.keys() if x.endswith("weight")]
    name = set(name)
    print(name)

    # 2. 打印所有参数名和形状
    if show_shapes:
        print(f"\n>>> Loaded {len(state_dict)} keys from '{path}':\n")
        for name, tensor in state_dict.items():
            print(f"  {name:<60} shape={tuple(tensor.shape)} dtype={tensor.dtype}")
    
    # 3. 汇总统计
    if show_summary:
        total_params = sum(t.numel() for t in state_dict.values())
        num_layers  = len(state_dict)
        print(f"\n>>> Summary:")
        print(f"    total layers/keys : {num_layers}")
        print(f"    total parameters  : {total_params:,}  ({total_params/1e6:.2f} M)\n")

    # return state_dict

inspect_checkpoint("pretrained/depthsplat-gs-small-re10k-256x256-view2-cfeab6b1.pth", show_shapes = False, show_summary = True)
# inspect_checkpoint("pretrained/depth-anything/model_s.ckpt", show_shapes = False, show_summary = True)


{'transformer', 'regressor', 'depth_head', 'concat_projects', '0', 'scratch', 'backbone', 'upsampler', 'projects', 'pretrained', 'resize_layers', '2', 'regressor_residual'}

>>> Summary:
    total layers/keys : 567
    total parameters  : 38,346,416  (38.35 M)



  ckpt = torch.load(path, map_location="cpu")


In [12]:
inspect_checkpoint("pretrained/depth-anything/model_s.ckpt", show_shapes = False, show_summary = True, num=1)


{'depth_head', 'pretrained'}

>>> Summary:
    total layers/keys : 265
    total parameters  : 25,083,079  (25.08 M)



  ckpt = torch.load(path, map_location="cpu")
