diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 629e74650beac..b2c64a8242de2 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -904,9 +904,13 @@ def __hash__(self): "pyspark.pandas.tests.groupby.test_rank", "pyspark.pandas.tests.groupby.test_size", "pyspark.pandas.tests.groupby.test_split_apply", - "pyspark.pandas.tests.groupby.test_split_apply_adv", - "pyspark.pandas.tests.groupby.test_split_apply_basic", + "pyspark.pandas.tests.groupby.test_split_apply_count", + "pyspark.pandas.tests.groupby.test_split_apply_first", + "pyspark.pandas.tests.groupby.test_split_apply_last", "pyspark.pandas.tests.groupby.test_split_apply_min_max", + "pyspark.pandas.tests.groupby.test_split_apply_skew", + "pyspark.pandas.tests.groupby.test_split_apply_std", + "pyspark.pandas.tests.groupby.test_split_apply_var", "pyspark.pandas.tests.groupby.test_stat", "pyspark.pandas.tests.groupby.test_stat_adv", "pyspark.pandas.tests.groupby.test_stat_ddof", @@ -1180,9 +1184,13 @@ def __hash__(self): "pyspark.pandas.tests.connect.groupby.test_parity_cumulative", "pyspark.pandas.tests.connect.groupby.test_parity_missing_data", "pyspark.pandas.tests.connect.groupby.test_parity_split_apply", - "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_adv", - "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_basic", + "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_count", + "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_first", + "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_last", "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_min_max", + "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_skew", + "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_std", + "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_var", "pyspark.pandas.tests.connect.series.test_parity_datetime", "pyspark.pandas.tests.connect.series.test_parity_string_ops_adv", "pyspark.pandas.tests.connect.series.test_parity_string_ops_basic", diff --git a/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_basic.py b/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_count.py similarity index 87% rename from python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_basic.py rename to python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_count.py index 2964213ab484b..3e7931d1b5a0b 100644 --- a/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_basic.py +++ b/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_count.py @@ -16,13 +16,13 @@ # import unittest -from pyspark.pandas.tests.groupby.test_split_apply_basic import GroupbySplitApplyBasicMixin +from pyspark.pandas.tests.groupby.test_split_apply_count import GroupbySplitApplyCountMixin from pyspark.testing.connectutils import ReusedConnectTestCase from pyspark.testing.pandasutils import PandasOnSparkTestUtils -class GroupbySplitApplyBasicParityTests( - GroupbySplitApplyBasicMixin, +class GroupbySplitApplyCountParityTests( + GroupbySplitApplyCountMixin, PandasOnSparkTestUtils, ReusedConnectTestCase, ): @@ -30,7 +30,7 @@ class GroupbySplitApplyBasicParityTests( if __name__ == "__main__": - from pyspark.pandas.tests.connect.groupby.test_parity_split_apply_basic import * # noqa + from pyspark.pandas.tests.connect.groupby.test_parity_split_apply_count import * # noqa try: import xmlrunner # type: ignore[import] diff --git a/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_first.py b/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_first.py new file mode 100644 index 0000000000000..c15f57da1cb79 --- /dev/null +++ b/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_first.py @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest + +from pyspark.pandas.tests.groupby.test_split_apply_first import GroupbySplitApplyFirstMixin +from pyspark.testing.connectutils import ReusedConnectTestCase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils + + +class GroupbySplitApplyFirstParityTests( + GroupbySplitApplyFirstMixin, + PandasOnSparkTestUtils, + ReusedConnectTestCase, +): + pass + + +if __name__ == "__main__": + from pyspark.pandas.tests.connect.groupby.test_parity_split_apply_first import * # noqa + + try: + import xmlrunner # type: ignore[import] + + testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=2) diff --git a/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_last.py b/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_last.py new file mode 100644 index 0000000000000..474e6d01a8a27 --- /dev/null +++ b/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_last.py @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest + +from pyspark.pandas.tests.groupby.test_split_apply_last import GroupbySplitApplyLastMixin +from pyspark.testing.connectutils import ReusedConnectTestCase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils + + +class GroupbySplitApplyLastParityTests( + GroupbySplitApplyLastMixin, + PandasOnSparkTestUtils, + ReusedConnectTestCase, +): + pass + + +if __name__ == "__main__": + from pyspark.pandas.tests.connect.groupby.test_parity_split_apply_last import * # noqa + + try: + import xmlrunner # type: ignore[import] + + testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=2) diff --git a/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_skew.py b/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_skew.py new file mode 100644 index 0000000000000..746480a7fe9d4 --- /dev/null +++ b/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_skew.py @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest + +from pyspark.pandas.tests.groupby.test_split_apply_skew import GroupbySplitApplySkewMixin +from pyspark.testing.connectutils import ReusedConnectTestCase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils + + +class GroupbySplitApplySkewParityTests( + GroupbySplitApplySkewMixin, + PandasOnSparkTestUtils, + ReusedConnectTestCase, +): + pass + + +if __name__ == "__main__": + from pyspark.pandas.tests.connect.groupby.test_parity_split_apply_skew import * # noqa + + try: + import xmlrunner # type: ignore[import] + + testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=2) diff --git a/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_adv.py b/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_std.py similarity index 87% rename from python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_adv.py rename to python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_std.py index f8ddd8b8c9ab4..cd7457ed41035 100644 --- a/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_adv.py +++ b/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_std.py @@ -16,13 +16,13 @@ # import unittest -from pyspark.pandas.tests.groupby.test_split_apply_adv import GroupbySplitApplyAdvMixin +from pyspark.pandas.tests.groupby.test_split_apply_std import GroupbySplitApplyStdMixin from pyspark.testing.connectutils import ReusedConnectTestCase from pyspark.testing.pandasutils import PandasOnSparkTestUtils -class GroupbySplitApplyAdvParityTests( - GroupbySplitApplyAdvMixin, +class GroupbySplitApplyStdParityTests( + GroupbySplitApplyStdMixin, PandasOnSparkTestUtils, ReusedConnectTestCase, ): @@ -30,7 +30,7 @@ class GroupbySplitApplyAdvParityTests( if __name__ == "__main__": - from pyspark.pandas.tests.connect.groupby.test_parity_split_apply_adv import * # noqa + from pyspark.pandas.tests.connect.groupby.test_parity_split_apply_std import * # noqa try: import xmlrunner # type: ignore[import] diff --git a/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_var.py b/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_var.py new file mode 100644 index 0000000000000..642e84dee839d --- /dev/null +++ b/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply_var.py @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest + +from pyspark.pandas.tests.groupby.test_split_apply_var import GroupbySplitApplyVarMixin +from pyspark.testing.connectutils import ReusedConnectTestCase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils + + +class GroupbySplitApplyVarParityTests( + GroupbySplitApplyVarMixin, + PandasOnSparkTestUtils, + ReusedConnectTestCase, +): + pass + + +if __name__ == "__main__": + from pyspark.pandas.tests.connect.groupby.test_parity_split_apply_var import * # noqa + + try: + import xmlrunner # type: ignore[import] + + testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=2) diff --git a/python/pyspark/pandas/tests/groupby/test_split_apply_count.py b/python/pyspark/pandas/tests/groupby/test_split_apply_count.py new file mode 100644 index 0000000000000..7d13fe04c63e4 --- /dev/null +++ b/python/pyspark/pandas/tests/groupby/test_split_apply_count.py @@ -0,0 +1,49 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest + +from pyspark.testing.pandasutils import PandasOnSparkTestCase +from pyspark.testing.sqlutils import SQLTestUtils +from pyspark.pandas.tests.groupby.test_split_apply import GroupbySplitApplyTestingFuncMixin + + +class GroupbySplitApplyCountMixin(GroupbySplitApplyTestingFuncMixin): + def test_split_apply_combine_on_series(self): + funcs = [ + ((True, False), ["count"]), + ] + self._test_split_apply_func(funcs) + + +class GroupbySplitApplyCountTests( + GroupbySplitApplyCountMixin, + PandasOnSparkTestCase, + SQLTestUtils, +): + pass + + +if __name__ == "__main__": + from pyspark.pandas.tests.groupby.test_split_apply_count import * # noqa: F401 + + try: + import xmlrunner + + testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=2) diff --git a/python/pyspark/pandas/tests/groupby/test_split_apply_first.py b/python/pyspark/pandas/tests/groupby/test_split_apply_first.py new file mode 100644 index 0000000000000..cb44b31e8a068 --- /dev/null +++ b/python/pyspark/pandas/tests/groupby/test_split_apply_first.py @@ -0,0 +1,49 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest + +from pyspark.testing.pandasutils import PandasOnSparkTestCase +from pyspark.testing.sqlutils import SQLTestUtils +from pyspark.pandas.tests.groupby.test_split_apply import GroupbySplitApplyTestingFuncMixin + + +class GroupbySplitApplyFirstMixin(GroupbySplitApplyTestingFuncMixin): + def test_split_apply_combine_on_series(self): + funcs = [ + ((True, False), ["first"]), + ] + self._test_split_apply_func(funcs) + + +class GroupbySplitApplyFirstTests( + GroupbySplitApplyFirstMixin, + PandasOnSparkTestCase, + SQLTestUtils, +): + pass + + +if __name__ == "__main__": + from pyspark.pandas.tests.groupby.test_split_apply_first import * # noqa: F401 + + try: + import xmlrunner + + testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=2) diff --git a/python/pyspark/pandas/tests/groupby/test_split_apply_basic.py b/python/pyspark/pandas/tests/groupby/test_split_apply_last.py similarity index 85% rename from python/pyspark/pandas/tests/groupby/test_split_apply_basic.py rename to python/pyspark/pandas/tests/groupby/test_split_apply_last.py index 17c6179d19acb..4d9f3628634a8 100644 --- a/python/pyspark/pandas/tests/groupby/test_split_apply_basic.py +++ b/python/pyspark/pandas/tests/groupby/test_split_apply_last.py @@ -21,16 +21,16 @@ from pyspark.pandas.tests.groupby.test_split_apply import GroupbySplitApplyTestingFuncMixin -class GroupbySplitApplyBasicMixin(GroupbySplitApplyTestingFuncMixin): +class GroupbySplitApplyLastMixin(GroupbySplitApplyTestingFuncMixin): def test_split_apply_combine_on_series(self): funcs = [ - ((True, False), ["count", "first", "last"]), + ((True, False), ["last"]), ] self._test_split_apply_func(funcs) class GroupbySplitApplyBasicTests( - GroupbySplitApplyBasicMixin, + GroupbySplitApplyLastMixin, PandasOnSparkTestCase, SQLTestUtils, ): @@ -38,7 +38,7 @@ class GroupbySplitApplyBasicTests( if __name__ == "__main__": - from pyspark.pandas.tests.groupby.test_split_apply_basic import * # noqa: F401 + from pyspark.pandas.tests.groupby.test_split_apply_last import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/pandas/tests/groupby/test_split_apply_skew.py b/python/pyspark/pandas/tests/groupby/test_split_apply_skew.py new file mode 100644 index 0000000000000..8af555bdbb73b --- /dev/null +++ b/python/pyspark/pandas/tests/groupby/test_split_apply_skew.py @@ -0,0 +1,49 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest + +from pyspark.testing.pandasutils import PandasOnSparkTestCase +from pyspark.testing.sqlutils import SQLTestUtils +from pyspark.pandas.tests.groupby.test_split_apply import GroupbySplitApplyTestingFuncMixin + + +class GroupbySplitApplySkewMixin(GroupbySplitApplyTestingFuncMixin): + def test_split_apply_combine_on_series(self): + funcs = [ + ((False, False), ["skew"]), + ] + self._test_split_apply_func(funcs) + + +class GroupbySplitApplySkewTests( + GroupbySplitApplySkewMixin, + PandasOnSparkTestCase, + SQLTestUtils, +): + pass + + +if __name__ == "__main__": + from pyspark.pandas.tests.groupby.test_split_apply_skew import * # noqa: F401 + + try: + import xmlrunner + + testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=2) diff --git a/python/pyspark/pandas/tests/groupby/test_split_apply_adv.py b/python/pyspark/pandas/tests/groupby/test_split_apply_std.py similarity index 85% rename from python/pyspark/pandas/tests/groupby/test_split_apply_adv.py rename to python/pyspark/pandas/tests/groupby/test_split_apply_std.py index abce6d5ed4f04..c215478bb8372 100644 --- a/python/pyspark/pandas/tests/groupby/test_split_apply_adv.py +++ b/python/pyspark/pandas/tests/groupby/test_split_apply_std.py @@ -21,16 +21,16 @@ from pyspark.pandas.tests.groupby.test_split_apply import GroupbySplitApplyTestingFuncMixin -class GroupbySplitApplyAdvMixin(GroupbySplitApplyTestingFuncMixin): +class GroupbySplitApplyStdMixin(GroupbySplitApplyTestingFuncMixin): def test_split_apply_combine_on_series(self): funcs = [ - ((False, False), ["var", "std", "skew"]), + ((False, False), ["std"]), ] self._test_split_apply_func(funcs) -class GroupbySplitApplyAdvTests( - GroupbySplitApplyAdvMixin, +class GroupbySplitApplyStdTests( + GroupbySplitApplyStdMixin, PandasOnSparkTestCase, SQLTestUtils, ): @@ -38,7 +38,7 @@ class GroupbySplitApplyAdvTests( if __name__ == "__main__": - from pyspark.pandas.tests.groupby.test_split_apply_adv import * # noqa: F401 + from pyspark.pandas.tests.groupby.test_split_apply_std import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/pandas/tests/groupby/test_split_apply_var.py b/python/pyspark/pandas/tests/groupby/test_split_apply_var.py new file mode 100644 index 0000000000000..314f7b9449572 --- /dev/null +++ b/python/pyspark/pandas/tests/groupby/test_split_apply_var.py @@ -0,0 +1,49 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest + +from pyspark.testing.pandasutils import PandasOnSparkTestCase +from pyspark.testing.sqlutils import SQLTestUtils +from pyspark.pandas.tests.groupby.test_split_apply import GroupbySplitApplyTestingFuncMixin + + +class GroupbySplitApplyVarMixin(GroupbySplitApplyTestingFuncMixin): + def test_split_apply_combine_on_series(self): + funcs = [ + ((False, False), ["var"]), + ] + self._test_split_apply_func(funcs) + + +class GroupbySplitApplyVarTests( + GroupbySplitApplyVarMixin, + PandasOnSparkTestCase, + SQLTestUtils, +): + pass + + +if __name__ == "__main__": + from pyspark.pandas.tests.groupby.test_split_apply_var import * # noqa: F401 + + try: + import xmlrunner + + testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=2)