From ab0bb3f627ac5fab4e8bcb24113f737d1ac6218c Mon Sep 17 00:00:00 2001 From: ajpotts Date: Wed, 31 Jan 2024 13:16:47 -0500 Subject: [PATCH] Closes #2918 add isempty for pdarray (#2933) Co-authored-by: Amanda Potts --- PROTO_tests/tests/string_test.py | 78 ++++++++++++++++++++++++++++++++ arkouda/strings.py | 39 ++++++++++++++++ src/SegmentedComputation.chpl | 4 ++ src/SegmentedMsg.chpl | 4 ++ src/SegmentedString.chpl | 15 ++++++ tests/string_test.py | 76 +++++++++++++++++++++++++++++++ 6 files changed, 216 insertions(+) diff --git a/PROTO_tests/tests/string_test.py b/PROTO_tests/tests/string_test.py index fef71b2ed19..e30a47cc2d3 100644 --- a/PROTO_tests/tests/string_test.py +++ b/PROTO_tests/tests/string_test.py @@ -576,6 +576,84 @@ def test_string_isdigit(self): assert example2.isdigit().to_list() == expected + def test_string_empty(self): + not_empty = ak.array([f"Strings {i}" for i in range(3)]) + empty = ak.array(["" for i in range(3)]) + example = ak.concatenate([not_empty, empty]) + + assert example.isempty().to_list() == [False, False, False, True, True, True] + + example2 = ak.array( + [ + "", + "string1", + "stringA", + "String", + "12345", + "Hello\tWorld", + " ", + "\n", + "3.14", + "\u0030", + "\u00B2", + ] + ) + + expected = [ + True, + False, + False, + False, + False, + False, + False, + False, + False, + False, + False, + ] + + assert example2.isempty().to_list() == expected + + def test_string_empty(self): + not_empty = ak.array([f"%Strings {i}" for i in range(3)]) + empty = ak.array(["" for i in range(3)]) + example = ak.concatenate([not_empty, empty]) + + assert example.isempty().to_list() == [False, False, False, True, True, True] + + example2 = ak.array( + [ + "", + "string1", + "stringA", + "String", + "12345", + "Hello\tWorld", + " ", + "\n", + "3.14", + "\u0030", + "\u00B2", + ] + ) + + expected = [ + True, + False, + False, + False, + False, + False, + False, + False, + False, + False, + False, + ] + + assert example2.isempty().to_list() == expected + def test_where(self): revs = ak.arange(10) % 2 == 0 s1 = ak.array([f"str {i}" for i in range(10)]) diff --git a/arkouda/strings.py b/arkouda/strings.py index b2509f5ba6e..c7953619ac9 100644 --- a/arkouda/strings.py +++ b/arkouda/strings.py @@ -814,6 +814,45 @@ def isdigit(self) -> pdarray: ) ) + def isempty(self) -> pdarray: + """ + Returns a boolean pdarray where index i indicates whether string i of the + Strings is empty. + + + True for elements that are the empty string, False otherwise + + Returns + ------- + pdarray, bool + True for elements that are digits, False otherwise + + Raises + ------ + RuntimeError + Raised if there is a server-side error thrown + + See Also + -------- + Strings.islower + Strings.isupper + Strings.istitle + + Examples + -------- + >>> not_empty = ak.array([f'Strings {i}' for i in range(3)]) + >>> empty = ak.array(['' for i in range(3)]) + >>> strings = ak.concatenate([not_empty, empty]) + >>> strings + array(['%Strings 0', '%Strings 1', '%Strings 2', '', '', '']) + >>> strings.isempty() + """ + return create_pdarray( + generic_msg( + cmd="checkChars", args={"subcmd": "isempty", "objType": self.objType, "obj": self.entry} + ) + ) + @typechecked def strip(self, chars: Optional[Union[bytes, str_scalars]] = "") -> Strings: """ diff --git a/src/SegmentedComputation.chpl b/src/SegmentedComputation.chpl index 161b2eba87b..ab988931560 100644 --- a/src/SegmentedComputation.chpl +++ b/src/SegmentedComputation.chpl @@ -51,6 +51,7 @@ module SegmentedComputation { StringIsAlphaNumeric, StringIsAlphabetic, StringIsDigit, + StringIsEmpty, } proc computeOnSegments(segments: [?D] int, ref values: [?vD] ?t, param function: SegFunction, type retType, const strArg: string = "") throws { @@ -120,6 +121,9 @@ module SegmentedComputation { when SegFunction.StringIsDigit { agg.copy(res[i], stringIsDigit(values, start..#len)); } + when SegFunction.StringIsEmpty { + agg.copy(res[i], stringIsEmpty(values, start..#len)); + } otherwise { compilerError("Unrecognized segmented function"); } diff --git a/src/SegmentedMsg.chpl b/src/SegmentedMsg.chpl index 70da9c8eb9e..997a25d6c4c 100644 --- a/src/SegmentedMsg.chpl +++ b/src/SegmentedMsg.chpl @@ -297,6 +297,10 @@ module SegmentedMsg { truth.a = strings.isdigit(); repMsg = "created "+st.attrib(rname); } + when "isempty" { + truth.a = strings.isempty(); + repMsg = "created "+st.attrib(rname); + } otherwise { var errorMsg = notImplementedError(pn, "%s".doFormat(subcmd)); smLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg); diff --git a/src/SegmentedString.chpl b/src/SegmentedString.chpl index 084c5997791..e52cd1de8ab 100644 --- a/src/SegmentedString.chpl +++ b/src/SegmentedString.chpl @@ -517,6 +517,14 @@ module SegmentedString { return computeOnSegments(offsets.a, values.a, SegFunction.StringIsDigit, bool); } + /* + Returns list of bools where index i indicates whether the string i of the SegString is empty + :returns: [domain] bool where index i indicates whether the string i of the SegString is empty + */ + proc isempty() throws { + return computeOnSegments(offsets.a, values.a, SegFunction.StringIsEmpty, bool); + } + proc bytesToUintArr(const max_bytes:int, lens: [?D] ?t, st) throws { // bytes contained in strings < 128 bits, so concatenating is better than the hash ref off = offsets.a; @@ -1479,6 +1487,13 @@ module SegmentedString { return interpretAsString(values, rng, borrow=true).isDigit(); } + /* + The SegFunction called by computeOnSegments for isempty + */ + inline proc stringIsEmpty(ref values, rng) throws { + return interpretAsString(values, rng, borrow=true).isEmpty(); + } + inline proc stringBytesToUintArr(ref values, rng) throws { var localSlice = new lowLevelLocalizingSlice(values, rng); return | reduce [i in 0..#rng.size] (localSlice.ptr(i):uint)<<(8*(rng.size-1-i)); diff --git a/tests/string_test.py b/tests/string_test.py index 5e56cf5df3f..0dc8b9a5578 100644 --- a/tests/string_test.py +++ b/tests/string_test.py @@ -662,6 +662,82 @@ def test_string_isdigit(self): self.assertListEqual(example2.isdigit().to_list(), expected) + def test_string_empty(self): + not_empty = ak.array([f"Strings {i}" for i in range(3)]) + empty = ak.array(["" for i in range(3)]) + example = ak.concatenate([not_empty, empty]) + self.assertListEqual(example.isempty().to_list(), [False, False, False, True, True, True]) + + example2 = ak.array( + [ + "", + "string1", + "stringA", + "String", + "12345", + "Hello\tWorld", + " ", + "\n", + "3.14", + "\u0030", + "\u00B2", + ] + ) + + expected = [ + True, + False, + False, + False, + False, + False, + False, + False, + False, + False, + False, + ] + + self.assertListEqual(example2.isempty().to_list(), expected) + + def test_string_empty(self): + not_empty = ak.array([f"%Strings {i}" for i in range(3)]) + empty = ak.array(["" for i in range(3)]) + example = ak.concatenate([not_empty, empty]) + self.assertListEqual(example.isempty().to_list(), [False, False, False, True, True, True]) + + example2 = ak.array( + [ + "", + "string1", + "stringA", + "String", + "12345", + "Hello\tWorld", + " ", + "\n", + "3.14", + "\u0030", + "\u00B2", + ] + ) + + expected = [ + True, + False, + False, + False, + False, + False, + False, + False, + False, + False, + False, + ] + + self.assertListEqual(example2.isempty().to_list(), expected) + def test_where(self): revs = ak.arange(10) % 2 == 0 s1 = ak.array([f"str {i}" for i in range(10)])