In [1]:
from __future__ import absolute_import, division, print_function
%matplotlib inline

import os
import numpy as np
import PIL.Image as pil
import matplotlib.pyplot as plt


import torch
from torchvision import transforms

import networks
from utils import download_model_if_doesnt_exist
from tqdm import tqdm

  warn(


本实验使用的monodepth2模型使用的权重是这个model_name = "mono_1024x320"

In [2]:
model_name = "mono_1024x320"

download_model_if_doesnt_exist(model_name)
encoder_path = os.path.join("models", model_name, "encoder.pth")
depth_decoder_path = os.path.join("models", model_name, "depth.pth")

# LOADING PRETRAINED MODEL
encoder = networks.ResnetEncoder(18, False)
depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4))

loaded_dict_enc = torch.load(encoder_path, map_location='cpu')
filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()}
encoder.load_state_dict(filtered_dict_enc)

loaded_dict = torch.load(depth_decoder_path, map_location='cpu')
depth_decoder.load_state_dict(loaded_dict)

encoder.eval()
depth_decoder.eval();



source_folder是需要进行深度估计的图片文件夹，save_folder是输出结果视差图的npy文件保存路径

In [3]:
# 源文件夹路径
source_folder = r"E:\location_test\test718"
# 目标文件夹路径
save_folder = r"E:\location_test\test718_depth"

if not os.path.exists(save_folder):
    os.mkdir(save_folder)

for imgName in tqdm(os.listdir(source_folder)):
    # 判断是否为照片
    if imgName.endswith(".jpg"):
        imgPath = os.path.join(source_folder,imgName)
        image_path = imgPath
        input_image = pil.open(image_path).convert('RGB')
        original_width, original_height = input_image.size
        feed_height = loaded_dict_enc['height']
        feed_width = loaded_dict_enc['width']
        input_image_resized = input_image.resize((feed_width, feed_height), pil.LANCZOS)
        input_image_pytorch = transforms.ToTensor()(input_image_resized).unsqueeze(0)

        with torch.no_grad():
          features = encoder(input_image_pytorch)
          outputs = depth_decoder(features)
        disp = outputs[("disp", 0)]

        disp_resized = torch.nn.functional.interpolate(disp,(original_height, original_width), mode="bilinear", align_corners=False)

        # Saving colormapped depth image
        disp_resized_np = disp_resized.squeeze().cpu().numpy()
        # 在处理完深度估计后，保存disp_resized_np
        save_name = imgName.replace(".jpg", ".npy")
        save_path = os.path.join(save_folder, save_name)
        np.save(save_path, disp_resized_np)

100%|██████████| 182/182 [02:57<00:00,  1.02it/s]


In [4]:
import csv
import numpy as np
import os

# 读取CSV文件C:\Users\wac\Desktop\location_test\test33.csv
csv_file = r"E:\location_test\test718_detected.csv" # 你的CSV文件路径
# 定义深度估计视差图的文件夹路径
depth_folder = r"E:\location_test\test718_depth"
# 已经计算得到的baseline和focal_length的值
baseline = 1.16418045821547
focal_length = 0.9398960573839849
# 定义列表用于存储结果
results = []

def calculate_depth_from_disparity(disparity_map, pixel_x, pixel_y, baseline, focal_length):
    # 获取像素点的视差值
    disparity_value = disparity_map[pixel_y, pixel_x]
    
    # 计算深度距离（米）
    depth = (baseline * focal_length) / disparity_value
    
    return depth

# 打开CSV文件并读取内容
with open(csv_file, 'r', newline='', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    
    # 遍历CSV中的每一行
    for row in reader:
        image_name = row["name"]
        bottom_x = int(row["bottom_x"])
        bottom_y = int(row["bottom_y"])
        
        # 构建深度图文件路径
        file_path = os.path.join(depth_folder, image_name.replace(".jpg", ".npy"))
        
        # 读取.npy文件
        disparity_map = np.load(file_path)

        # 使用函数计算深度距离
        depth = calculate_depth_from_disparity(disparity_map, bottom_x, bottom_y, baseline, focal_length)
        
        # 将结果存储到列表中
        results.append((image_name, bottom_x, bottom_y, depth))
# 输出结果
for result in results:
    image_name,bottom_x, bottom_y,depth = result
    print(f"图像名: {image_name}, 对应路灯底部坐标: {bottom_x, bottom_y}的深度是: {depth}米")


图像名: 000000_22.23747811_114.15322674_202307_8_267.jpg, 对应路灯底部坐标: (1767, 543)的深度是: 8.75017041931811米
图像名: 000000_22.23747811_114.15322674_202307_8_267.jpg, 对应路灯底部坐标: (1300, 694)的深度是: 4.805069285566414米
图像名: 000001_22.23931270_114.15336254_202307_9_93.jpg, 对应路灯底部坐标: (1095, 714)的深度是: 4.75892498330491米
图像名: 000013_22.23741139_114.15319390_202307_8_217.jpg, 对应路灯底部坐标: (1460, 855)的深度是: 2.9885498962118815米
图像名: 000013_22.23741139_114.15319390_202307_8_217.jpg, 对应路灯底部坐标: (1252, 575)的深度是: 10.573987379435831米
图像名: 000014_22.23931590_114.15346578_202307_9_90.jpg, 对应路灯底部坐标: (1908, 601)的深度是: 6.337052055083257米
图像名: 000023_22.23930698_114.15355668_202307_9_80.jpg, 对应路灯底部坐标: (1976, 621)的深度是: 7.903249405563733米
图像名: 000030_22.23925878_114.15368894_202307_9_57.jpg, 对应路灯底部坐标: (1328, 623)的深度是: 5.779897705481946米
图像名: 000036_22.23916300_114.15378963_202307_9_38.jpg, 对应路灯底部坐标: (1993, 628)的深度是: 7.300029371908782米
图像名: 000037_22.24561553_114.15984044_202307_25_315.jpg, 对应路灯底部坐标: (1680, 575)的深度是: 9.12549734365

In [7]:
import csv

# 定义CSV文件路径
csv_input_file = r"E:\location_test\test718_detected.csv"

# 读取原始CSV文件内容
data = []
with open(csv_input_file, 'r', newline='', encoding='utf-8') as file:
    reader = csv.reader(file)
    data = list(reader)

# 添加新列的标题
data[0].append("depth")

# 添加结果数据
for i, result in enumerate(results, start=1):
    depth = result[3]
    data[i].append(depth)

# 将合并后的数据写入到原始CSV文件中
with open(csv_input_file, 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerows(data)