Skip to content

Commit

Permalink
Add bit_length
Browse files Browse the repository at this point in the history
  • Loading branch information
acvictor committed Mar 4, 2024
1 parent 4b0c74a commit 3a215db
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 0 deletions.
5 changes: 5 additions & 0 deletions velox/docs/functions/spark/string.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ Unless specified otherwise, all functions return NULL if at least one of the arg
Returns unicode code point of the first character of ``string``. Returns 0 if ``string`` is empty.

.. spark:function:: bit_length(string) -> integer
Returns the bit length for the specified string column.::
SELECT bit_length('123'); -- 24

.. spark:function:: chr(n) -> varchar
Returns the Unicode code point ``n`` as a single character string.
Expand Down
5 changes: 5 additions & 0 deletions velox/functions/sparksql/Register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@ void registerFunctions(const std::string& prefix) {
registerFunction<sparksql::LeftFunction, Varchar, Varchar, int32_t>(
{prefix + "left"});

registerFunction<sparksql::BitLengthVarcharFunction, int32_t, Varchar>(
{prefix + "bit_length"});
registerFunction<sparksql::BitLengthVarbinaryFunction, int32_t, Varbinary>(
{prefix + "bit_length"});

exec::registerStatefulVectorFunction(
prefix + "instr", instrSignatures(), makeInstr);
exec::registerStatefulVectorFunction(
Expand Down
22 changes: 22 additions & 0 deletions velox/functions/sparksql/String.h
Original file line number Diff line number Diff line change
Expand Up @@ -759,6 +759,28 @@ struct LeftFunction {
}
};

template <typename T>
struct BitLengthVarcharFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
out_type<int32_t>& result,
const arg_type<Varchar>& input) {
result = input.size() * 8;
}
};

template <typename T>
struct BitLengthVarbinaryFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
out_type<int32_t>& result,
const arg_type<Varbinary>& input) {
result = input.size() * 8;
}
};

/// translate(string, match, replace) -> varchar
///
/// Returns a new translated string. It translates the character in ``string``
Expand Down
31 changes: 31 additions & 0 deletions velox/functions/sparksql/tests/StringTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@ class StringTest : public SparkFunctionBaseTest {
"length(c0)", {arg}, {VARBINARY()});
}

std::optional<int32_t> bit_length_varchar(std::optional<std::string> arg) {
return evaluateOnce<int32_t>("bit_length(c0)", arg);
}

std::optional<int32_t> bit_length_varbinary(std::optional<std::string> arg) {
return evaluateOnce<int32_t, std::string>(
"bit_length(cast(c0 as varbinary))", arg);
}

std::optional<std::string> trim(std::optional<std::string> srcStr) {
return evaluateOnce<std::string>("trim(c0)", srcStr);
}
Expand Down Expand Up @@ -284,6 +293,7 @@ TEST_F(StringTest, LengthString) {
EXPECT_EQ(length(std::string("\0", 1)), 1);
EXPECT_EQ(length("1"), 1);
EXPECT_EQ(length("😋"), 1);
EXPECT_EQ(length("😋😋"), 2);
// Consists of five codepoints.
EXPECT_EQ(length(kWomanFacepalmingLightSkinTone), 5);
EXPECT_EQ(length("1234567890abdef"), 15);
Expand All @@ -298,6 +308,27 @@ TEST_F(StringTest, LengthBytes) {
EXPECT_EQ(length_bytes("1234567890abdef"), 15);
}

TEST_F(StringTest, BitLengthVarchar) {
EXPECT_EQ(bit_length_varchar(""), 0);
EXPECT_EQ(bit_length_varchar(std::string("\0", 1)), 8);
EXPECT_EQ(bit_length_varchar("1"), 8);
EXPECT_EQ(bit_length_varchar("123"), 24);
EXPECT_EQ(bit_length_varchar("😋"), 32);
// Consists of five codepoints.
EXPECT_EQ(bit_length_varchar(kWomanFacepalmingLightSkinTone), 136);
EXPECT_EQ(bit_length_varchar("\U0001F408"), 32);
}

TEST_F(StringTest, BitLengthVarbinary) {
EXPECT_EQ(bit_length_varbinary(""), 0);
EXPECT_EQ(bit_length_varbinary(std::string("\0", 1)), 8);
EXPECT_EQ(bit_length_varbinary("1"), 8);
EXPECT_EQ(bit_length_varbinary("123"), 24);
EXPECT_EQ(bit_length_varbinary("😋"), 32);
EXPECT_EQ(bit_length_varbinary(kWomanFacepalmingLightSkinTone), 136);
EXPECT_EQ(bit_length_varbinary("\U0001F408"), 32);
}

TEST_F(StringTest, MD5) {
EXPECT_EQ(md5(std::nullopt), std::nullopt);
EXPECT_EQ(md5(""), "d41d8cd98f00b204e9800998ecf8427e");
Expand Down

0 comments on commit 3a215db

Please sign in to comment.