Skip to content

Commit

Permalink
Add base implementation and tests for LPAD function without pad texts…
Browse files Browse the repository at this point in the history
… considering string input values
  • Loading branch information
jpedroantunes committed May 3, 2021
1 parent 73927fc commit 585cad3
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 0 deletions.
52 changes: 52 additions & 0 deletions cpp/src/gandiva/precompiled/string_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1489,6 +1489,58 @@ const char* lpad(gdv_int64 context, const char* text, gdv_int32 text_len,
}
}

FORCE_INLINE
const char* lpad_no_fill_text(gdv_int64 context, const char* text, gdv_int32 text_len,
gdv_int32 return_length, gdv_int32* out_len) {
// if the text length or the defined return length (number of characters to return)
// is <=0, then return an empty string.
if (text_len == 0 || return_length <= 0) {
*out_len = 0;
return "";
}

// initially counts the number of utf8 characters in the defined text and fill_text
int32_t text_char_count = utf8_length(context, text, text_len);
// text_char_count is zero if input has invalid utf8 char
// fill_char_count is zero if fill_text_len is > 0 and its value has invalid utf8 char
if (text_char_count == 0) {
*out_len = 0;
return "";
}

if (return_length == text_char_count) {
// case where the return length is same as the text's length, or if it need to
// fill into text but "fill_text" is empty, then return text directly.
*out_len = text_len;
return text;
} else if (return_length < text_char_count) {
// case where it truncates the result on return length.
*out_len = utf8_byte_pos(context, text, text_len, return_length);
return text;
} else {
// case (return_length > text_char_count)
// case where it needs to copy "fill_text" on the string left. The total number
// of chars to copy is given by (return_length - text_char_count)
char* ret =
reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(
context,
text_len + (return_length - text_char_count)));
if (ret == nullptr) {
gdv_fn_context_set_error_msg(context,
"Could not allocate memory for output string");
*out_len = 0;
return "";
}
const char* blank_space = " ";
for (int i = 0; i < return_length - text_char_count; ++i) {
ret[i] = blank_space[0];
}
memcpy(ret + return_length - text_char_count, text, text_len);
*out_len = text_len + (return_length - text_char_count);
return ret;
}
}

FORCE_INLINE
const char* split_part(gdv_int64 context, const char* text, gdv_int32 text_len,
const char* delimiter, gdv_int32 delim_len, gdv_int32 index,
Expand Down
26 changes: 26 additions & 0 deletions cpp/src/gandiva/precompiled/string_ops_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,7 @@ TEST(TestStringOps, TestLpadString) {
gdv_int32 out_len = 0;
const char* out_str;

// LPAD function tests - with defined fill pad text
out_str = lpad(ctx_ptr, "TestString", 10, 4, "fill", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "Test");

Expand Down Expand Up @@ -737,6 +738,31 @@ TEST(TestStringOps, TestLpadString) {

out_str = lpad(ctx_ptr, "hello", 5, 6, "д", 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "дhello");

// LPAD function tests - with NO pad text
out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "Test");

out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");

out_str = lpad_no_fill_text(ctx_ptr, "TestString", 0, 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");

out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, 0,&out_len);
EXPECT_EQ(std::string(out_str, out_len), "");

out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, -500, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");

out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, 18, &out_len);
EXPECT_EQ(std::string(out_str, out_len), " TestString");

out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, 15, &out_len);
EXPECT_EQ(std::string(out_str, out_len), " TestString");

out_str = lpad_no_fill_text(ctx_ptr, "абвгд", 10, 7, &out_len);
EXPECT_EQ(std::string(out_str, out_len), " абвгд");
}

TEST(TestStringOps, TestRtrim) {
Expand Down
3 changes: 3 additions & 0 deletions cpp/src/gandiva/precompiled/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,9 @@ const char* lpad(gdv_int64 context, const char* text, gdv_int32 text_len,
gdv_int32 return_length, const char* fill_text, gdv_int32 fill_text_len,
gdv_int32* out_len);

const char* lpad_no_fill_text(gdv_int64 context, const char* text, gdv_int32 text_len,
gdv_int32 return_length, gdv_int32* out_len);

const char* replace_with_max_len_utf8_utf8_utf8(gdv_int64 context, const char* text,
gdv_int32 text_len, const char* from_str,
gdv_int32 from_str_len,
Expand Down

0 comments on commit 585cad3

Please sign in to comment.