Skip to content

Commit

Permalink
DEPR: Series logical ops with different index coercing to bool (panda…
Browse files Browse the repository at this point in the history
…s-dev#52839)

* BUG: Series logical ops with different index coercing to bool

* Turn into a deprecation

* Fix condition

* Make futurewarning more specific
  • Loading branch information
mroeschke authored May 1, 2023
1 parent f886f13 commit 4ea0ce1
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 17 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ Deprecations
- Deprecated the methods :meth:`Series.bool` and :meth:`DataFrame.bool` (:issue:`51749`)
- Deprecated unused "closed" and "normalize" keywords in the :class:`DatetimeIndex` constructor (:issue:`52628`)
- Deprecated unused "closed" keyword in the :class:`TimedeltaIndex` constructor (:issue:`52628`)
-
- Deprecated logical operation between two non boolean :class:`Series` with different indexes always coercing the result to bool dtype. In a future version, this will maintain the return type of the inputs. (:issue:`52500`, :issue:`52538`)

.. ---------------------------------------------------------------------------
.. _whatsnew_210.performance:
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5573,6 +5573,18 @@ def _align_for_op(self, right, align_asobject: bool = False):
# avoid repeated alignment
if not left.index.equals(right.index):
if align_asobject:
if left.dtype not in (object, np.bool_) or right.dtype not in (
object,
np.bool_,
):
warnings.warn(
"Operation between non boolean Series with different "
"indexes will no longer return a boolean result in "
"a future version. Cast both Series to object type "
"to maintain the prior behavior.",
FutureWarning,
stacklevel=find_stack_level(),
)
# to keep original value's dtype for bool ops
left = left.astype(object)
right = right.astype(object)
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/frame/test_logical_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,21 @@ def test_logical_ops_categorical_columns(self):
),
)
tm.assert_frame_equal(result, expected)

def test_int_dtype_different_index_not_bool(self):
# GH 52500
df1 = DataFrame([1, 2, 3], index=[10, 11, 23], columns=["a"])
df2 = DataFrame([10, 20, 30], index=[11, 10, 23], columns=["a"])
result = np.bitwise_xor(df1, df2)
expected = DataFrame([21, 8, 29], index=[10, 11, 23], columns=["a"])
tm.assert_frame_equal(result, expected)

result = df1 ^ df2
tm.assert_frame_equal(result, expected)

def test_different_dtypes_different_index_raises(self):
# GH 52538
df1 = DataFrame([1, 2], index=["a", "b"])
df2 = DataFrame([3, 4], index=["b", "c"])
with pytest.raises(TypeError, match="unsupported operand type"):
df1 & df2
51 changes: 35 additions & 16 deletions pandas/tests/series/test_logical_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,6 @@ def test_logical_ops_bool_dtype_with_ndarray(self):
def test_logical_operators_int_dtype_with_bool_dtype_and_reindex(self):
# GH#9016: support bitwise op for integer types

# with non-matching indexes, logical operators will cast to object
# before operating
index = list("bca")

s_tft = Series([True, False, True], index=index)
Expand All @@ -229,20 +227,26 @@ def test_logical_operators_int_dtype_with_bool_dtype_and_reindex(self):

# s_0123 will be all false now because of reindexing like s_tft
expected = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"])
result = s_tft & s_0123
with tm.assert_produces_warning(FutureWarning):
result = s_tft & s_0123
tm.assert_series_equal(result, expected)

# GH 52538: Deprecate casting to object type when reindex is needed;
# matches DataFrame behavior
expected = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"])
result = s_0123 & s_tft
with tm.assert_produces_warning(FutureWarning):
result = s_0123 & s_tft
tm.assert_series_equal(result, expected)

s_a0b1c0 = Series([1], list("b"))

res = s_tft & s_a0b1c0
with tm.assert_produces_warning(FutureWarning):
res = s_tft & s_a0b1c0
expected = s_tff.reindex(list("abc"))
tm.assert_series_equal(res, expected)

res = s_tft | s_a0b1c0
with tm.assert_produces_warning(FutureWarning):
res = s_tft | s_a0b1c0
expected = s_tft.reindex(list("abc"))
tm.assert_series_equal(res, expected)

Expand Down Expand Up @@ -396,24 +400,27 @@ def test_logical_ops_label_based(self):
tm.assert_series_equal(result, expected)

# vs non-matching
result = a & Series([1], ["z"])
with tm.assert_produces_warning(FutureWarning):
result = a & Series([1], ["z"])
expected = Series([False, False, False, False], list("abcz"))
tm.assert_series_equal(result, expected)

result = a | Series([1], ["z"])
with tm.assert_produces_warning(FutureWarning):
result = a | Series([1], ["z"])
expected = Series([True, True, False, False], list("abcz"))
tm.assert_series_equal(result, expected)

# identity
# we would like s[s|e] == s to hold for any e, whether empty or not
for e in [
empty.copy(),
Series([1], ["z"]),
Series(np.nan, b.index),
Series(np.nan, a.index),
]:
result = a[a | e]
tm.assert_series_equal(result, a[a])
with tm.assert_produces_warning(FutureWarning):
for e in [
empty.copy(),
Series([1], ["z"]),
Series(np.nan, b.index),
Series(np.nan, a.index),
]:
result = a[a | e]
tm.assert_series_equal(result, a[a])

for e in [Series(["z"])]:
result = a[a | e]
Expand Down Expand Up @@ -496,3 +503,15 @@ def test_logical_ops_df_compat(self):

tm.assert_frame_equal(s3.to_frame() | s4.to_frame(), exp_or1.to_frame())
tm.assert_frame_equal(s4.to_frame() | s3.to_frame(), exp_or.to_frame())

@pytest.mark.xfail(reason="Will pass once #52839 deprecation is enforced")
def test_int_dtype_different_index_not_bool(self):
# GH 52500
ser1 = Series([1, 2, 3], index=[10, 11, 23], name="a")
ser2 = Series([10, 20, 30], index=[11, 10, 23], name="a")
result = np.bitwise_xor(ser1, ser2)
expected = Series([21, 8, 29], index=[10, 11, 23], name="a")
tm.assert_series_equal(result, expected)

result = ser1 ^ ser2
tm.assert_series_equal(result, expected)

0 comments on commit 4ea0ce1

Please sign in to comment.