Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions src/viam/sdk/components/camera.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <viam/api/component/camera/v1/camera.grpc.pb.h>
#include <viam/api/component/camera/v1/camera.pb.h>

#include <viam/sdk/common/exception.hpp>
#include <viam/sdk/common/utils.hpp>
#include <viam/sdk/resource/resource.hpp>

Expand All @@ -24,6 +25,104 @@ API API::traits<Camera>::api() {
return {kRDK, kComponent, "camera"};
}

namespace {
// UTF-8 encoding of 'DEPTHMAP' used in the header of FORMAT_RAW_DEPTH bytes payload.
const uint64_t k_magic_number = 0x44455054484D4150ULL;

// Number of bytes of the header for FORMAT_RAW_DEPTH payloads
const auto k_header_size = sizeof(uint64_t) * 3;

// Appends an int of type T in big-endian format to a byte vector and updates the offset.
template <typename T>
void append_big_endian(std::vector<unsigned char>& data, size_t* offset, T value) {
if (data.size() < *offset + sizeof(T)) {
throw Exception("Incorrect data size: attempted to write beyond data bounds");
}
T value_be = boost::endian::native_to_big(value);
std::memcpy(&data[*offset], &value_be, sizeof(T));
*offset += sizeof(T);
}

// Reads an int of type T from data in big-endian format and updates the offset.
// Intended to be used in a sequential manner.
template <typename T>
T read_big_endian(const std::vector<unsigned char>& data, size_t* offset) {
if (data.size() < *offset + sizeof(T)) {
throw std::runtime_error("Attempted to read beyond data bounds.");
}

T value;
std::memcpy(&value, &data[*offset], sizeof(T));
value = boost::endian::big_to_native(value);
*offset += sizeof(T);

return value;
}

} // namespace

std::vector<unsigned char> Camera::encode_depth_map(const Camera::depth_map& m) {
const auto& shape = m.shape();
if (shape.size() != 2) {
throw Exception("Depth map is not a 2D array.");
}

const size_t height = shape[0];
const size_t width = shape[1];
const size_t total_byte_count = k_header_size + height * width * sizeof(uint16_t);
std::vector<unsigned char> data(total_byte_count);
size_t offset = 0;

// Network data is stored in big-endian, while most host systems are little endian.
append_big_endian(data, &offset, k_magic_number);
append_big_endian(data, &offset, width);
append_big_endian(data, &offset, height);

for (size_t i = 0; i < height; ++i) {
for (size_t j = 0; j < width; ++j) {
append_big_endian(data, &offset, m(i, j));
}
}

return data;
}

Camera::depth_map Camera::decode_depth_map(const std::vector<unsigned char>& data) {
if (data.size() < k_header_size) {
throw Exception("Data too short to contain valid depth information. Size: " +
std::to_string(data.size()));
}

size_t offset = 0;
const uint64_t magic_number = read_big_endian<uint64_t>(data, &offset);
if (magic_number != k_magic_number) {
throw Exception(
"Invalid header for a vnd.viam.dep encoded depth image. The data may be corrupted, or "
"is not a Viam-encoded depth map.");
}

const uint64_t width = read_big_endian<uint64_t>(data, &offset);
const uint64_t height = read_big_endian<uint64_t>(data, &offset);

const auto expected_size = k_header_size + width * height * sizeof(uint16_t);
if (data.size() != expected_size) {
throw Exception("Data size does not match width, height, and depth values. Actual size: " +
std::to_string(data.size()) +
". Expected size: " + std::to_string(expected_size) +
". Width: " + std::to_string(width) + " Height: " + std::to_string(height));
}

std::vector<uint16_t> depth_values;
depth_values.reserve(width * height);
for (size_t i = 0; i < width * height; ++i) {
depth_values.push_back(read_big_endian<uint16_t>(data, &offset));
}

xt::xarray<uint16_t> m = xt::xarray<uint16_t>::from_shape({height, width});
std::copy(depth_values.begin(), depth_values.end(), m.begin());
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked up a method to copy the memory in. Let me know if there's a better way to do it @acmorrow

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey, I was actually just looking at that. So, what you have here is fine, though it does cost you a copy. If you wanted to avoid the copy though, I think that's totally doable.

The trick would be to not create a std::vector, but just to directly create the xt::xarray you want to return, of the appropriate size and shape, before the loop, and then populate it in the loop.

xt::xarray<uint16_t> m = xt::xarray<uint16_t>::from_shape({height, width});
for (size_t i = 0; i < width * height; ++i) {
    m(<ii>, <jj>) = read_big_endian<uint16_t>(data, &offset);

Now, that'd require a little hassle to compute the correct ii and jj w.r.t. height and width and i, which is sort of a hassle. However, xtensor lets you reshape and view. So I think you could do it something like this:

xt::xarray<uint16_t> m = xt::xarray<uint16_t>::from_shape({height, width});
auto m_linear_view = xt::flatten(m);
for (size_t i = 0; i < width * height; ++i) {
    m_linear_view[i] = read_big_endian<uint16_t>(data, &offset));
return m;

It may not be exactly flatten, you might need reshape_view? I'd need to spend a little time with the xtensor docs to be sure. But something from https://xtensor.readthedocs.io/en/latest/view.html.

I'd say spend no more than 15 minutes on it. If you can make it work, great. If not, put in a TODO and leave it as a copy.

return m;
}

std::string Camera::normalize_mime_type(const std::string& str) {
std::string mime_type = str;
if (str.size() >= Camera::lazy_suffix.size() &&
Expand Down
41 changes: 41 additions & 0 deletions src/viam/sdk/components/camera.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#include <string>
#include <vector>

#include <boost/endian/conversion.hpp>
#include <xtensor/xarray.hpp>

#include <viam/api/common/v1/common.pb.h>
#include <viam/api/component/camera/v1/camera.pb.h>

Expand Down Expand Up @@ -83,6 +86,44 @@ class Camera : public Component {
response_metadata metadata;
};

/// @struct depth_map
/// @brief Represents the dimensions and depth values of a depth map.
///
/// depth_map holds the height and width data of a depth map, along with the depth values
/// as a 2D xtensor array of 16-bit unsigned integers, where the first axis is height, and
/// the second axis is width. Each depth value represents the distance from the camera
/// to a point in the scene.
using depth_map = xt::xarray<uint16_t>;

/// Encodes the dimensions and depth values of a depth map into a raw binary format
/// (MIME type FORMAT_RAW_DEPTH).
///
/// This function takes a depth_map, and encodes this information into a binary blob. The binary
/// format consists of "magic number" header (UTF-8 encoding for 'DEPTHMAP' in big-endian),
/// then the width and height encoded as 64-bit unsigned integers, followed by the depth
/// values encoded as 16-bit unsigned integers (big-endian format). This format is suitable
/// for serialization and transmission of depth map data through gRPC.
///
/// @param depth_map A type alias for a 2D xtensor array
/// @return A std::vector<unsigned char> representing the encoded binary data of the depth
/// map.
/// The vector includes 8 bytes for width, 8 bytes for height, followed by 2 bytes
/// per depth value.
/// @throws Exception: if the depth map is not 2D
///
static std::vector<unsigned char> encode_depth_map(const Camera::depth_map& m);

/// Decode image data of custom MIME type FORMAT_RAW_DEPTH into a depth_map structure.
///
/// This function processes a binary blob representing a depth map in a specific
/// format and extracts the dimensions and depth values contained within.
///
/// @param data A vector of unsigned chars representing the binary data of the depth map.
/// @return A depth_map (type alias for a 2D xtensor array)
/// @throws Exception: if the data is misformatted e.g. doesn't contain valid depth information,
/// or if the data size does not match the expected size based on the width and height.
static Camera::depth_map decode_depth_map(const std::vector<unsigned char>& data);

/// @brief remove any extra suffix's from the mime type string.
static std::string normalize_mime_type(const std::string& str);

Expand Down
24 changes: 24 additions & 0 deletions src/viam/sdk/tests/test_camera.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,30 @@ BOOST_AUTO_TEST_CASE(test_do_command) {
});
}

BOOST_AUTO_TEST_CASE(test_depth_map_encode_decode) {
xt::xarray<uint16_t> depth_map =
xt::xarray<uint16_t>::from_shape({3, 2}); // height = 3, width = 2
depth_map(0, 0) = 100;
depth_map(0, 1) = 200;
depth_map(1, 0) = 300;
depth_map(1, 1) = 400;
depth_map(2, 0) = 500;
depth_map(2, 1) = 600;

std::vector<unsigned char> data = Camera::encode_depth_map(depth_map);
auto result_map = Camera::decode_depth_map(data);

// Check if the dimensions and values match
BOOST_CHECK_EQUAL(result_map.shape()[0], 3); // height
BOOST_CHECK_EQUAL(result_map.shape()[1], 2); // width

std::vector<uint16_t> expected_values = {100, 200, 300, 400, 500, 600};
std::vector<uint16_t> result_values(result_map.begin(), result_map.end());

BOOST_CHECK_EQUAL_COLLECTIONS(
result_values.begin(), result_values.end(), expected_values.begin(), expected_values.end());
}

BOOST_AUTO_TEST_SUITE_END()

} // namespace sdktests
Expand Down