# Pinhole image to BEV 
![针孔图像](../data/pinhole.jpg)

针孔图像

In [None]:
import numpy as np
import cv2
from scipy.spatial.transform import Rotation as R

# 原图像尺寸
img = cv2.imread("../data/pinhole.jpg")

# 手动选择地面上的 4 点（图像坐标）
src_points = np.float32([[274, 287], [462, 285], [528, 417], [197, 420]])

# 对应 BEV 图上的 4 点
dst_points = np.float32([[0, 0], [200, 0], [200, 200], [0, 200]])

# 计算透视变换矩阵
M = cv2.getPerspectiveTransform(src_points, dst_points)
print(M)
p_bev = np.array([[54], [74], [1]])
p_cam = np.linalg.inv(M) @ p_bev
print(p_bev)
print(p_cam)
p_cam = p_cam[:2] / p_cam[2]
print(p_cam)
# M 描述从到原图到bev图的映射关系
# 变换图像
bev_image = cv2.warpPerspective(img, M, (200, 200))
cv2.imwrite("../data/pinhole_bev.jpg",bev_image)



[[-1.59985900e+00 -9.26234160e-01  7.04190571e+02]
 [-4.22298082e-02 -3.96960197e+00  1.15084673e+03]
 [-1.08988746e-04 -8.57632393e-03  1.00000000e+00]]
[[54]
 [74]
 [ 1]]
[[-177.38844035]
 [-180.00173956]
 [  -0.56308657]]
[[315.02871795]
 [319.66974359]]


True

![bev图](../data/pinhole_bev.jpg)

In [None]:


# === Step 1: 图像与内参 ===
img = cv2.imread("../data/pinhole.jpg")
K = np.array([
    [571, 0, 335.3769836425781],
    [0, 571, 235.7864227294922],
    [0,   0,   1]
])

# === Step 2: BEV 图配置 ===
bev_range = [(0, 4), (-2, 2)]  # X: 前方 0~4m, Y: 左右 -2~2m
resolution = 0.01  # 每像素 1cm

(Xmin, Xmax), (Ymin, Ymax) = bev_range
W = int((Xmax - Xmin) / resolution)
H = int((Ymax - Ymin) / resolution)

# 内参构建（BEV 虚拟相机）
fx = fy = 1.0 / resolution
cx = W / 2
cy = H / 2
K_bev = np.array([
    [fx, 0, cx],
    [0, fy, cy],
    [0,  0,  1]
])

# === Step 3: 像素坐标网格 → BEV 相机坐标 ===
u, v = np.meshgrid(np.arange(W), np.arange(H))  # 像素网格
ones = np.ones_like(u)
pix_bev = np.stack([u, v, ones], axis=-1).reshape(-1, 3).T  # shape: (3, N)
coords_bev = np.linalg.inv(K_bev) @ pix_bev  # BEV相机坐标
coords_bev[2, :] = 0.0  # BEV 平面上，Z=0

# === Step 4: BEV 相机坐标 → 世界坐标 ===
angles_deg = [-90, 0, 180]  # ZYX: yaw, pitch, roll（内旋）
angles_rad = np.radians(angles_deg)
R_bev = R.from_euler('xyz', [angles_rad[2], angles_rad[1], angles_rad[0]])
R_bev_matrix = R_bev.as_matrix()
t_bev = np.array([2, 0, 0])  # BEV 相机在世界坐标下的位置


Pw = R_bev_matrix @ coords_bev + t_bev.reshape(3, 1)  # 世界坐标系点

# === Step 5: 世界坐标 → 相机坐标 ===
RT = np.array([
    [0.00892549, -0.99987765, -0.01284575,  0.04974401],
    [-0.31300375, 0.00940711, -0.94970530,  1.00559546],
    [0.94970995, 0.01249735,  0.31288149, -0.24506292],
    [0, 0, 0, 1]
])
R_cam = RT[:3, :3]
t_cam = RT[:3, 3]
Pc = R_cam @ Pw + t_cam.reshape(3, 1)

# === Step 6: 相机坐标 → 图像像素坐标 ===
Xc, Yc, Zc = Pc
valid = Zc > 0
x = Xc / Zc
y = Yc / Zc
u_img = K[0, 0] * x + K[0, 2]
v_img = K[1, 1] * y + K[1, 2]

# === Step 7: 构建 remap 映射表 ===
map_x = np.full((H * W,), -1, dtype=np.float32)
map_y = np.full((H * W,), -1, dtype=np.float32)
map_x[valid] = u_img[valid]
map_y[valid] = v_img[valid]
map_x = map_x.reshape(H, W)
map_y = map_y.reshape(H, W)
mask = valid.reshape(H, W)

# === Step 8: 应用映射生成 BEV 图像 ===
bev_img = cv2.remap(img, map_x, map_y, interpolation=cv2.INTER_LINEAR)
cv2.imwrite("../data/pinhole_bev_remap.jpg", bev_img)

print(K)  
print(RT)
print(K_bev)
print(R_bev_matrix)
print(W)

[[571.           0.         335.37698364]
 [  0.         571.         235.78642273]
 [  0.           0.           1.        ]]
[[ 0.00892549 -0.99987765 -0.01284575  0.04974401]
 [-0.31300375  0.00940711 -0.9497053   1.00559546]
 [ 0.94970995  0.01249735  0.31288149 -0.24506292]
 [ 0.          0.          0.          1.        ]]
[[100.   0. 200.]
 [  0. 100. 200.]
 [  0.   0.   1.]]
[[ 2.22044605e-16 -1.00000000e+00 -1.22464680e-16]
 [-1.00000000e+00 -2.22044605e-16 -2.46519033e-32]
 [ 0.00000000e+00  1.22464680e-16 -1.00000000e+00]]
400


In [7]:
def generate_bev_remap_map(K_cam, RT_cam, K_bev, RT_bev, bev_size):
    H, W = bev_size
    u, v = np.meshgrid(np.arange(W), np.arange(H))  # 像素网格
    ones = np.ones_like(u)
    pix_bev = np.stack([u, v, ones], axis=-1).reshape(-1, 3).T  # shape: (3, N)
    coords_bev = np.linalg.inv(K_bev) @ pix_bev  # BEV相机坐标
    # print(coords_bev[2, :])
    coords_bev[2, :] = 1.0  # BEV 平面上，Z=0
    R_bev = RT_bev[:3, :3]
    t_bev = RT_bev[:3, 3]
    Pw = R_bev @ coords_bev + t_bev.reshape(3, 1)  # 世界坐标系点
    R_cam = RT_cam[:3, :3]
    t_cam = RT_cam[:3, 3]
    Pc = R_cam @ Pw + t_cam.reshape(3, 1)
    # 世界坐标系点做像素坐标系转换
    Xc, Yc, Zc = Pc
    valid = Zc > 0
    x = Xc / Zc
    y = Yc / Zc
    # Pc = RT_cam[:3, :3] @ Pw + RT_cam[:3, 3:4]
    # Zc = np.clip(Pc[2], 1e-3, None)  # 防止除0
    # x = Pc[0] / Zc
    # y = Pc[1] / Zc
    u_img = K_cam[0, 0] * x + K_cam[0, 2]
    v_img = K_cam[1, 1] * y + K_cam[1, 2]
    map_x = np.full((H * W,), -1, dtype=np.float32)
    map_y = np.full((H * W,), -1, dtype=np.float32)
    map_x[valid] = u_img[valid]
    map_y[valid] = v_img[valid]
    map_x = map_x.reshape(H, W)
    map_y = map_y.reshape(H, W)
    mask = valid.reshape(H, W)
    return map_x, map_y, mask
    

In [18]:

# 加载图像
img = cv2.imread("../data/pinhole.jpg")

# 相机内参（假设）
cam_K = np.array([[571, 0, 335.38],
                  [0, 571, 235.79],
                  [0, 0, 1]])

# 相机外参（世界到相机）
RT_cam_world = np.array([
    [0.0089, -0.9999, -0.0128, 0.05],
    [-0.3130, 0.0094, -0.9497, 1.01],
    [0.9497, 0.0125, 0.3129, -0.245],
    [0, 0, 0, 1]
])

# BEV 图参数
bev_range = [(0, 4), (-2, 2)]  # X, Y 世界坐标范围
resolution = 0.01  # 米/像素
W = int((bev_range[0][1] - bev_range[0][0]) / resolution)
H = int((bev_range[1][1] - bev_range[1][0]) / resolution)
bev_size = (H, W)

# BEV 内参：固定缩放 + 中心平移
bev_K = np.array([[1.0 / resolution, 0, W // 2],
                  [0, 1.0 / resolution, H // 2],
                  [0, 0, 1]])

# BEV 外参（BEV到世界）
# 内旋 ZYX: z=-90, y=0, x=180（车前向上）
angles_deg = [-90, 0, 180]
angles_rad = np.radians(angles_deg)
R_bev = R.from_euler('xyz', [angles_rad[2], angles_rad[1], angles_rad[0]])
t_bev = np.array([2.0, 0.0, 1.0])  # 平移 2 米
R_bev_matrix = R_bev.as_matrix()
RT_world_bev = np.eye(4)
RT_world_bev[:3, :3] = R_bev_matrix
RT_world_bev[:3, 3] = t_bev

# 生成映射表
map_x, map_y, mask = generate_bev_remap_map(cam_K, RT_cam_world, bev_K, RT_world_bev, bev_size)
print(cam_K)  
print(RT_cam_world)
print(bev_K)
print(RT_world_bev)
print(bev_size)
# 应用映射
bev_img = cv2.remap(img, map_x, map_y, interpolation=cv2.INTER_LINEAR)
cv2.imwrite("../data/pinhole_bev_remap.jpg", bev_img)


[[571.     0.   335.38]
 [  0.   571.   235.79]
 [  0.     0.     1.  ]]
[[ 0.0089 -0.9999 -0.0128  0.05  ]
 [-0.313   0.0094 -0.9497  1.01  ]
 [ 0.9497  0.0125  0.3129 -0.245 ]
 [ 0.      0.      0.      1.    ]]
[[100.   0. 200.]
 [  0. 100. 200.]
 [  0.   0.   1.]]
[[ 2.22044605e-16 -1.00000000e+00 -1.22464680e-16  2.00000000e+00]
 [-1.00000000e+00 -2.22044605e-16 -2.46519033e-32  0.00000000e+00]
 [ 0.00000000e+00  1.22464680e-16 -1.00000000e+00  1.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.00000000e+00]]
(400, 400)


True

![bev图](../data/pinhole_bev_remap.jpg)

In [None]:

# BEV相机坐标系中的点（举例点在Z=0平面）
p_bev = np.array([1.0, 0.0, 1.0]).reshape(3,1)  # BEV坐标系点

# 转换到世界坐标系
R_bev = RT_world_bev[:3, :3]
t_bev = RT_world_bev[:3, 3].reshape(3, 1)
p_world = R_bev @ p_bev + t_bev
print(RT_world_bev)
print("BEV坐标系点:", p_bev.flatten())
print("转换到世界坐标系:", p_world.flatten())
# 先将BEV相机坐标系点转换到像素坐标
p_pix_homo = bev_K @ p_bev
print(p_pix_homo)   
p_pix = p_pix_homo[:2] / p_pix_homo[2]
print(bev_K)
print(p_pix_homo)   
print("BEV相机坐标系点:", p_bev.flatten())
print("投影到BEV像素坐标:", p_pix.flatten())



[[ 2.22044605e-16 -1.00000000e+00 -1.22464680e-16  2.00000000e+00]
 [-1.00000000e+00 -2.22044605e-16 -2.46519033e-32  0.00000000e+00]
 [ 0.00000000e+00  1.22464680e-16 -1.00000000e+00  1.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.00000000e+00]]
BEV坐标系点: [1. 0. 1.]
转换到世界坐标系: [ 2. -1.  0.]
[[300.]
 [200.]
 [  1.]]
[[100.   0. 200.]
 [  0. 100. 200.]
 [  0.   0.   1.]]
[[300.]
 [200.]
 [  1.]]
BEV相机坐标系点: [1. 0. 1.]
投影到BEV像素坐标: [300. 200.]


In [None]:

p_pixel_bev = np.array([[200],[100],[1.0]])
p_cam_bev = np.linalg.inv(bev_K) @ p_pixel_bev

print(p_pixel_bev)
print(p_cam_bev)

# 转换到世界坐标系
R_bev = RT_world_bev[:3, :3]
t_bev = RT_world_bev[:3, 3].reshape(3, 1)
p_cam_world = R_bev @ p_cam_bev +t_bev

print(p_cam_world)
R_cam = RT_cam_world[:3, :3]
t_cam = RT_cam_world[:3, 3]
p_cam = R_cam @ p_cam_world + t_cam.reshape(3, 1)
print(p_cam)
p_pixel_cam = cam_K @ p_cam
print(p_pixel_cam)
p_pixel_cam = p_pixel_cam[:2] / p_pixel_cam[2]
print(p_pixel_cam)

[[200.]
 [100.]
 [  1.]]
[[ 0.]
 [-1.]
 [ 1.]]
[[ 3.00000000e+00]
 [ 2.22044605e-16]
 [-2.22044605e-16]]
[[0.0767]
 [0.071 ]
 [2.6041]]
[[917.158758]
 [654.561739]
 [  2.6041  ]]
[[352.19797934]
 [251.35814254]]


In [12]:
H_bev = R_bev @ np.linalg.inv(bev_K)
# # 3. 应用 H_bev：世界坐标 → 像素坐标（齐次）
p_world = H_bev @ p_pixel_bev + t_bev
# p_world = H_bev @ p_pixel_bev
# p_world[2] = 1
print("映射到 BEV 像素坐标:", p_pixel_bev)
print("世界坐标 (X=3, Y=0):", p_world)
R_cam = RT_cam_world[:3, :3]
t_cam = RT_cam_world[:3, 3]
H_cam = cam_K@R_cam  
p_cam = H_cam@p_world +cam_K @ t_cam.reshape(3, 1)
# p_cam = H_cam@p_world 
print(p_cam)
p_pixel_cam = p_cam[:2] / p_cam[2]
print(p_pixel_cam)

p_cam = cam_K@R_cam@(R_bev @ np.linalg.inv(bev_K)@ p_pixel_bev + t_bev) + cam_K @ t_cam.reshape(3, 1)
print(p_cam)


映射到 BEV 像素坐标: [[200.]
 [100.]
 [  1.]]
世界坐标 (X=3, Y=0): [[3.]
 [0.]
 [0.]]
[[917.158758]
 [654.561739]
 [  2.6041  ]]
[[352.19797934]
 [251.35814254]]
[[917.158758]
 [654.561739]
 [  2.6041  ]]


In [27]:
# # 构造齐次点（原相机坐标系下）
P_world = np.array([[3.0], [0.0], [0.0], [1]])  # 一个简单点 (1, 0, 0)
P_camera = RT_cam_world@P_world
P_bev = np.linalg.inv(RT_world_bev)@P_world
print(P_camera)
print(P_bev)
RT_bev_cam = np.linalg.inv(RT_world_bev) @ np.linalg.inv(RT_cam_world)
P_test = RT_bev_cam @ P_camera
print(P_test)


[[0.0767]
 [0.071 ]
 [2.6041]
 [1.    ]]
[[ 2.22044605e-16]
 [-1.00000000e+00]
 [ 1.00000000e+00]
 [ 1.00000000e+00]]
[[ 2.28983499e-16]
 [-1.00000000e+00]
 [ 1.00000000e+00]
 [ 1.00000000e+00]]


In [34]:
p_pixel_bev = np.array([[200],[100],[1.0]])
p_cam_bev = np.linalg.inv(bev_K) @ p_pixel_bev
print(p_pixel_bev)
print(p_cam_bev)


[[200.]
 [100.]
 [  1.]]
[[ 0.]
 [-1.]
 [ 1.]]


In [36]:
RT_cam_bev = np.linalg.inv(RT_bev_cam)
R_cam = RT_cam_bev[:3, :3]
t_cam = RT_cam_bev[:3, 3].reshape(3, 1)
p_cam_cam = R_cam @ p_cam_bev + t_cam
print(p_cam_cam)

[[0.0767]
 [0.071 ]
 [2.6041]]


In [39]:
p_pixel_cam = cam_K @ p_cam_cam
print(p_pixel_cam)

p_pixel_cam_test =cam_K@R_cam @np.linalg.inv(bev_K)@ p_pixel_bev + cam_K@t_cam
print(p_pixel_cam_test)


[[917.158758]
 [654.561739]
 [  2.6041  ]]
[[917.158758]
 [654.561739]
 [  2.6041  ]]


In [52]:
# 4. 平面参数
n = np.array([[0], [0], [-1]])  # 地面法向量
d = 1.0  # 地面与 BEV 相机距离（米）
RT_cam_bev = np.linalg.inv(RT_bev_cam)
R_cam = RT_cam_bev[:3, :3]
t_cam = RT_cam_bev[:3, 3].reshape(3, 1)
H = cam_K @ (R_cam - (t_cam @ n.T) / d) @ np.linalg.inv(bev_K)
H_inv = np.linalg.inv(H)
p_test = H@p_pixel_bev
print(p_test)
p_test = p_test[:2] / p_test[2]
print(p_test)
bev_image = cv2.warpPerspective(img, H_inv, (400, 400))
cv2.imwrite("../data/pinhole_bev_homography.jpg",bev_image)

[[917.158758]
 [654.561739]
 [  2.6041  ]]
[[352.19797934]
 [251.35814254]]


True

# fisheye to bev 