In [None]:
import cv2
import torch
import numpy as np
import open3d as o3d
from torchvision.transforms import Compose, Resize, ToTensor, Normalize

# Load MiDaS model for depth estimation
def load_midas_model():
    model_type = "DPT_Large"  # MiDaS v3 - Large model (higher accuracy)
    model = torch.hub.load("intel-isl/MiDaS", model_type)
    model.eval()

    # Use GPU if available
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)

    # Load transforms for preprocessing
    midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
    if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
        transform = midas_transforms.dpt_transform
    else:
        transform = midas_transforms.small_transform

    return model, transform, device

# Predict depth from a 2D image
def predict_depth(image_path, model, transform, device):
    # Load and preprocess the image
    img = cv2.imread(image_path)
    if img is None:
        raise FileNotFoundError(f"Image not found or unable to load: {image_path}")

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    input_batch = transform(img).to(device)

    # Predict depth
    with torch.no_grad():
        prediction = model(input_batch)

    # Resize depth map to original image size
    depth_map = torch.nn.functional.interpolate(
        prediction.unsqueeze(1),
        size=img.shape[:2],
        mode="bicubic",
        align_corners=False,
    ).squeeze().cpu().numpy()

    return img, depth_map

# Create a 3D point cloud from the depth map
def create_point_cloud(img, depth_map):
    height, width = depth_map.shape
    focal_length = 0.8 * width  # Approximate focal length (can be adjusted)

    # Create a point cloud
    points = []
    colors = []
    for v in range(height):
        for u in range(width):
            z = depth_map[v, u]
            x = (u - width / 2) * z / focal_length
            y = (v - height / 2) * z / focal_length
            points.append([x, y, z])
            colors.append(img[v, u] / 255.0)  # Normalize color values

    # Convert to Open3D point cloud
    point_cloud = o3d.geometry.PointCloud()
    point_cloud.points = o3d.utility.Vector3dVector(points)
    point_cloud.colors = o3d.utility.Vector3dVector(colors)

    return point_cloud

# Downsample a point cloud
def downsample_point_cloud(point_cloud, voxel_size=0.01):
    return point_cloud.voxel_down_sample(voxel_size)

# Align two point clouds using ICP
def align_point_clouds(source, target, threshold=0.05, max_iteration=50):
    print("Starting ICP alignment...")
    transformation = o3d.pipelines.registration.registration_icp(
        source, target, threshold, np.identity(4),
        o3d.pipelines.registration.TransformationEstimationPointToPoint(),
        o3d.pipelines.registration.ICPConvergenceCriteria(max_iteration=max_iteration)
    )
    print("ICP alignment completed.")
    return transformation.transformation

# Combine multiple point clouds into a single point cloud
def combine_point_clouds(point_clouds):
    # Start with the first point cloud
    combined_point_cloud = point_clouds[0]

    # Iteratively align and add the remaining point clouds
    for i in range(1, len(point_clouds)):
        print(f"Aligning point cloud {i}...")
        transformation = align_point_clouds(point_clouds[i], combined_point_cloud)
        point_clouds[i].transform(transformation)

        # Add the aligned point cloud to the combined point cloud
        combined_point_cloud += point_clouds[i]

    return combined_point_cloud

# Main function
def main():
    # Load MiDaS model
    model, transform, device = load_midas_model()

    # List of image paths (replace with your image paths)
    image_paths = [
        "/content/1.jpg",
        "/content/2.jpg",
        "/content/3.jpg",
        # Add more image paths as needed
    ]

    point_clouds = []
    for image_path in image_paths:
        try:
            print(f"Processing image: {image_path}")
            img, depth_map = predict_depth(image_path, model, transform, device)
            point_cloud = create_point_cloud(img, depth_map)
            point_cloud = downsample_point_cloud(point_cloud)  # Downsample the point cloud
            point_clouds.append(point_cloud)
        except Exception as e:
            print(f"Error processing {image_path}: {e}")

    if not point_clouds:
        print("No valid point clouds generated. Exiting.")
        return

    # Combine point clouds with ICP-based alignment
    combined_point_cloud = combine_point_clouds(point_clouds)

    # Remove statistical outliers
    combined_point_cloud, _ = combined_point_cloud.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)

    # Visualize the combined point cloud
    o3d.visualization.draw_geometries([combined_point_cloud])

    # Save the combined point cloud
    o3d.io.write_point_cloud("output_combined_point_cloud.ply", combined_point_cloud)
    print("Combined point cloud saved as output_combined_point_cloud.ply")

if __name__ == "__main__":
    main()

Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master
Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


Processing image: /content/1.jpg
Processing image: /content/2.jpg
Processing image: /content/3.jpg
Processing image: /content/4.jpg
Processing image: /content/5.jpg
Aligning point cloud 1...
Starting ICP alignment...
ICP alignment completed.
Aligning point cloud 2...
Starting ICP alignment...
ICP alignment completed.
Aligning point cloud 3...
Starting ICP alignment...
ICP alignment completed.
Aligning point cloud 4...
Starting ICP alignment...
ICP alignment completed.
Combined point cloud saved as output_combined_point_cloud.ply


In [None]:
pip install torch torchvision opencv-python open3d

