diff --git a/src/viam/sdk/components/camera.cpp b/src/viam/sdk/components/camera.cpp index 78a3accf4..7eb0d88ad 100644 --- a/src/viam/sdk/components/camera.cpp +++ b/src/viam/sdk/components/camera.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -24,6 +25,104 @@ API API::traits::api() { return {kRDK, kComponent, "camera"}; } +namespace { +// UTF-8 encoding of 'DEPTHMAP' used in the header of FORMAT_RAW_DEPTH bytes payload. +const uint64_t k_magic_number = 0x44455054484D4150ULL; + +// Number of bytes of the header for FORMAT_RAW_DEPTH payloads +const auto k_header_size = sizeof(uint64_t) * 3; + +// Appends an int of type T in big-endian format to a byte vector and updates the offset. +template +void append_big_endian(std::vector& data, size_t* offset, T value) { + if (data.size() < *offset + sizeof(T)) { + throw Exception("Incorrect data size: attempted to write beyond data bounds"); + } + T value_be = boost::endian::native_to_big(value); + std::memcpy(&data[*offset], &value_be, sizeof(T)); + *offset += sizeof(T); +} + +// Reads an int of type T from data in big-endian format and updates the offset. +// Intended to be used in a sequential manner. +template +T read_big_endian(const std::vector& data, size_t* offset) { + if (data.size() < *offset + sizeof(T)) { + throw std::runtime_error("Attempted to read beyond data bounds."); + } + + T value; + std::memcpy(&value, &data[*offset], sizeof(T)); + value = boost::endian::big_to_native(value); + *offset += sizeof(T); + + return value; +} + +} // namespace + +std::vector Camera::encode_depth_map(const Camera::depth_map& m) { + const auto& shape = m.shape(); + if (shape.size() != 2) { + throw Exception("Depth map is not a 2D array."); + } + + const size_t height = shape[0]; + const size_t width = shape[1]; + const size_t total_byte_count = k_header_size + height * width * sizeof(uint16_t); + std::vector data(total_byte_count); + size_t offset = 0; + + // Network data is stored in big-endian, while most host systems are little endian. + append_big_endian(data, &offset, k_magic_number); + append_big_endian(data, &offset, width); + append_big_endian(data, &offset, height); + + for (size_t i = 0; i < height; ++i) { + for (size_t j = 0; j < width; ++j) { + append_big_endian(data, &offset, m(i, j)); + } + } + + return data; +} + +Camera::depth_map Camera::decode_depth_map(const std::vector& data) { + if (data.size() < k_header_size) { + throw Exception("Data too short to contain valid depth information. Size: " + + std::to_string(data.size())); + } + + size_t offset = 0; + const uint64_t magic_number = read_big_endian(data, &offset); + if (magic_number != k_magic_number) { + throw Exception( + "Invalid header for a vnd.viam.dep encoded depth image. The data may be corrupted, or " + "is not a Viam-encoded depth map."); + } + + const uint64_t width = read_big_endian(data, &offset); + const uint64_t height = read_big_endian(data, &offset); + + const auto expected_size = k_header_size + width * height * sizeof(uint16_t); + if (data.size() != expected_size) { + throw Exception("Data size does not match width, height, and depth values. Actual size: " + + std::to_string(data.size()) + + ". Expected size: " + std::to_string(expected_size) + + ". Width: " + std::to_string(width) + " Height: " + std::to_string(height)); + } + + std::vector depth_values; + depth_values.reserve(width * height); + for (size_t i = 0; i < width * height; ++i) { + depth_values.push_back(read_big_endian(data, &offset)); + } + + xt::xarray m = xt::xarray::from_shape({height, width}); + std::copy(depth_values.begin(), depth_values.end(), m.begin()); + return m; +} + std::string Camera::normalize_mime_type(const std::string& str) { std::string mime_type = str; if (str.size() >= Camera::lazy_suffix.size() && diff --git a/src/viam/sdk/components/camera.hpp b/src/viam/sdk/components/camera.hpp index 912a3d917..c40582429 100644 --- a/src/viam/sdk/components/camera.hpp +++ b/src/viam/sdk/components/camera.hpp @@ -8,6 +8,9 @@ #include #include +#include +#include + #include #include @@ -83,6 +86,44 @@ class Camera : public Component { response_metadata metadata; }; + /// @struct depth_map + /// @brief Represents the dimensions and depth values of a depth map. + /// + /// depth_map holds the height and width data of a depth map, along with the depth values + /// as a 2D xtensor array of 16-bit unsigned integers, where the first axis is height, and + /// the second axis is width. Each depth value represents the distance from the camera + /// to a point in the scene. + using depth_map = xt::xarray; + + /// Encodes the dimensions and depth values of a depth map into a raw binary format + /// (MIME type FORMAT_RAW_DEPTH). + /// + /// This function takes a depth_map, and encodes this information into a binary blob. The binary + /// format consists of "magic number" header (UTF-8 encoding for 'DEPTHMAP' in big-endian), + /// then the width and height encoded as 64-bit unsigned integers, followed by the depth + /// values encoded as 16-bit unsigned integers (big-endian format). This format is suitable + /// for serialization and transmission of depth map data through gRPC. + /// + /// @param depth_map A type alias for a 2D xtensor array + /// @return A std::vector representing the encoded binary data of the depth + /// map. + /// The vector includes 8 bytes for width, 8 bytes for height, followed by 2 bytes + /// per depth value. + /// @throws Exception: if the depth map is not 2D + /// + static std::vector encode_depth_map(const Camera::depth_map& m); + + /// Decode image data of custom MIME type FORMAT_RAW_DEPTH into a depth_map structure. + /// + /// This function processes a binary blob representing a depth map in a specific + /// format and extracts the dimensions and depth values contained within. + /// + /// @param data A vector of unsigned chars representing the binary data of the depth map. + /// @return A depth_map (type alias for a 2D xtensor array) + /// @throws Exception: if the data is misformatted e.g. doesn't contain valid depth information, + /// or if the data size does not match the expected size based on the width and height. + static Camera::depth_map decode_depth_map(const std::vector& data); + /// @brief remove any extra suffix's from the mime type string. static std::string normalize_mime_type(const std::string& str); diff --git a/src/viam/sdk/tests/test_camera.cpp b/src/viam/sdk/tests/test_camera.cpp index 4187b1ef0..84fb593be 100644 --- a/src/viam/sdk/tests/test_camera.cpp +++ b/src/viam/sdk/tests/test_camera.cpp @@ -95,6 +95,30 @@ BOOST_AUTO_TEST_CASE(test_do_command) { }); } +BOOST_AUTO_TEST_CASE(test_depth_map_encode_decode) { + xt::xarray depth_map = + xt::xarray::from_shape({3, 2}); // height = 3, width = 2 + depth_map(0, 0) = 100; + depth_map(0, 1) = 200; + depth_map(1, 0) = 300; + depth_map(1, 1) = 400; + depth_map(2, 0) = 500; + depth_map(2, 1) = 600; + + std::vector data = Camera::encode_depth_map(depth_map); + auto result_map = Camera::decode_depth_map(data); + + // Check if the dimensions and values match + BOOST_CHECK_EQUAL(result_map.shape()[0], 3); // height + BOOST_CHECK_EQUAL(result_map.shape()[1], 2); // width + + std::vector expected_values = {100, 200, 300, 400, 500, 600}; + std::vector result_values(result_map.begin(), result_map.end()); + + BOOST_CHECK_EQUAL_COLLECTIONS( + result_values.begin(), result_values.end(), expected_values.begin(), expected_values.end()); +} + BOOST_AUTO_TEST_SUITE_END() } // namespace sdktests