diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index ba61287fac271..3d61dc0709d93 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -258,7 +258,7 @@ Deprecations - Deprecated the methods :meth:`Series.bool` and :meth:`DataFrame.bool` (:issue:`51749`) - Deprecated unused "closed" and "normalize" keywords in the :class:`DatetimeIndex` constructor (:issue:`52628`) - Deprecated unused "closed" keyword in the :class:`TimedeltaIndex` constructor (:issue:`52628`) -- +- Deprecated logical operation between two non boolean :class:`Series` with different indexes always coercing the result to bool dtype. In a future version, this will maintain the return type of the inputs. (:issue:`52500`, :issue:`52538`) .. --------------------------------------------------------------------------- .. _whatsnew_210.performance: diff --git a/pandas/core/series.py b/pandas/core/series.py index 2b71eb4a9480d..f4c2509b44d25 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5573,6 +5573,18 @@ def _align_for_op(self, right, align_asobject: bool = False): # avoid repeated alignment if not left.index.equals(right.index): if align_asobject: + if left.dtype not in (object, np.bool_) or right.dtype not in ( + object, + np.bool_, + ): + warnings.warn( + "Operation between non boolean Series with different " + "indexes will no longer return a boolean result in " + "a future version. Cast both Series to object type " + "to maintain the prior behavior.", + FutureWarning, + stacklevel=find_stack_level(), + ) # to keep original value's dtype for bool ops left = left.astype(object) right = right.astype(object) diff --git a/pandas/tests/frame/test_logical_ops.py b/pandas/tests/frame/test_logical_ops.py index f509ae52ad5a5..a58b79b5db111 100644 --- a/pandas/tests/frame/test_logical_ops.py +++ b/pandas/tests/frame/test_logical_ops.py @@ -189,3 +189,21 @@ def test_logical_ops_categorical_columns(self): ), ) tm.assert_frame_equal(result, expected) + + def test_int_dtype_different_index_not_bool(self): + # GH 52500 + df1 = DataFrame([1, 2, 3], index=[10, 11, 23], columns=["a"]) + df2 = DataFrame([10, 20, 30], index=[11, 10, 23], columns=["a"]) + result = np.bitwise_xor(df1, df2) + expected = DataFrame([21, 8, 29], index=[10, 11, 23], columns=["a"]) + tm.assert_frame_equal(result, expected) + + result = df1 ^ df2 + tm.assert_frame_equal(result, expected) + + def test_different_dtypes_different_index_raises(self): + # GH 52538 + df1 = DataFrame([1, 2], index=["a", "b"]) + df2 = DataFrame([3, 4], index=["b", "c"]) + with pytest.raises(TypeError, match="unsupported operand type"): + df1 & df2 diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py index ccd934c2f17bb..19412db91b487 100644 --- a/pandas/tests/series/test_logical_ops.py +++ b/pandas/tests/series/test_logical_ops.py @@ -217,8 +217,6 @@ def test_logical_ops_bool_dtype_with_ndarray(self): def test_logical_operators_int_dtype_with_bool_dtype_and_reindex(self): # GH#9016: support bitwise op for integer types - # with non-matching indexes, logical operators will cast to object - # before operating index = list("bca") s_tft = Series([True, False, True], index=index) @@ -229,20 +227,26 @@ def test_logical_operators_int_dtype_with_bool_dtype_and_reindex(self): # s_0123 will be all false now because of reindexing like s_tft expected = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"]) - result = s_tft & s_0123 + with tm.assert_produces_warning(FutureWarning): + result = s_tft & s_0123 tm.assert_series_equal(result, expected) + # GH 52538: Deprecate casting to object type when reindex is needed; + # matches DataFrame behavior expected = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"]) - result = s_0123 & s_tft + with tm.assert_produces_warning(FutureWarning): + result = s_0123 & s_tft tm.assert_series_equal(result, expected) s_a0b1c0 = Series([1], list("b")) - res = s_tft & s_a0b1c0 + with tm.assert_produces_warning(FutureWarning): + res = s_tft & s_a0b1c0 expected = s_tff.reindex(list("abc")) tm.assert_series_equal(res, expected) - res = s_tft | s_a0b1c0 + with tm.assert_produces_warning(FutureWarning): + res = s_tft | s_a0b1c0 expected = s_tft.reindex(list("abc")) tm.assert_series_equal(res, expected) @@ -396,24 +400,27 @@ def test_logical_ops_label_based(self): tm.assert_series_equal(result, expected) # vs non-matching - result = a & Series([1], ["z"]) + with tm.assert_produces_warning(FutureWarning): + result = a & Series([1], ["z"]) expected = Series([False, False, False, False], list("abcz")) tm.assert_series_equal(result, expected) - result = a | Series([1], ["z"]) + with tm.assert_produces_warning(FutureWarning): + result = a | Series([1], ["z"]) expected = Series([True, True, False, False], list("abcz")) tm.assert_series_equal(result, expected) # identity # we would like s[s|e] == s to hold for any e, whether empty or not - for e in [ - empty.copy(), - Series([1], ["z"]), - Series(np.nan, b.index), - Series(np.nan, a.index), - ]: - result = a[a | e] - tm.assert_series_equal(result, a[a]) + with tm.assert_produces_warning(FutureWarning): + for e in [ + empty.copy(), + Series([1], ["z"]), + Series(np.nan, b.index), + Series(np.nan, a.index), + ]: + result = a[a | e] + tm.assert_series_equal(result, a[a]) for e in [Series(["z"])]: result = a[a | e] @@ -496,3 +503,15 @@ def test_logical_ops_df_compat(self): tm.assert_frame_equal(s3.to_frame() | s4.to_frame(), exp_or1.to_frame()) tm.assert_frame_equal(s4.to_frame() | s3.to_frame(), exp_or.to_frame()) + + @pytest.mark.xfail(reason="Will pass once #52839 deprecation is enforced") + def test_int_dtype_different_index_not_bool(self): + # GH 52500 + ser1 = Series([1, 2, 3], index=[10, 11, 23], name="a") + ser2 = Series([10, 20, 30], index=[11, 10, 23], name="a") + result = np.bitwise_xor(ser1, ser2) + expected = Series([21, 8, 29], index=[10, 11, 23], name="a") + tm.assert_series_equal(result, expected) + + result = ser1 ^ ser2 + tm.assert_series_equal(result, expected)