From fa29a133132f0e8af115e3235b03d6aa96204bd9 Mon Sep 17 00:00:00 2001 From: Gianluca Rossi Date: Fri, 21 Aug 2015 13:50:38 +0100 Subject: [PATCH] BUG: Concat of Series w/o names. Closes #10698 --- doc/source/merging.rst | 19 ++++++++++++++++++- doc/source/whatsnew/v0.17.0.txt | 24 ++++++++++++++++++++++++ pandas/tools/merge.py | 14 ++++++++++---- pandas/tools/tests/test_merge.py | 20 +++++++++++++++++++- 4 files changed, 71 insertions(+), 6 deletions(-) diff --git a/doc/source/merging.rst b/doc/source/merging.rst index c62647010a131..5cb786d77cd1e 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -352,7 +352,24 @@ Passing ``ignore_index=True`` will drop all name references. More concatenating with group keys ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Let's consider a variation on the first example presented: +A fairly common use of the ``keys`` argument is to override the column names when creating a new DataFrame based on existing Series. +Notice how the default behaviour consists on letting the resulting DataFrame inherits the parent Series' name, when these existed. + +.. ipython:: python + + s3 = pd.Series([0, 1, 2, 3], name='foo') + s4 = pd.Series([0, 1, 2, 3]) + s5 = pd.Series([0, 1, 4, 5]) + + pd.concat([s3, s4, s5], axis=1) + +Through the ``keys`` argument we can override the existing column names. + +.. ipython:: python + + pd.concat([s3, s4, s5], axis=1, keys=['red','blue','yellow']) + +Let's consider now a variation on the very first example presented: .. ipython:: python diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index e9d39e0441055..e9d7296dbeb0d 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -226,6 +226,30 @@ Other enhancements - ``DataFrame.apply`` will return a Series of dicts if the passed function returns a dict and ``reduce=True`` (:issue:`8735`). +- ``concat`` will now use existing Series names if provided (:issue:`10698`). + + .. ipython:: python + + foo = pd.Series([1,2], name='foo') + bar = pd.Series([1,2]) + baz = pd.Series([4,5]) + + Previous Behavior: + + .. code-block:: python + + In [1] pd.concat([foo, bar, baz], 1) + Out[1]: + 0 1 2 + 0 1 1 4 + 1 2 2 5 + + New Behavior: + + .. ipython:: python + + pd.concat([foo, bar, baz], 1) + .. _whatsnew_0170.api: .. _whatsnew_0170.api_breaking: diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index a8b0d37b55bfe..0b5aad118e381 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -3,7 +3,7 @@ """ import numpy as np -from pandas.compat import range, long, lrange, lzip, zip, map, filter +from pandas.compat import range, lrange, lzip, zip, map, filter import pandas.compat as compat from pandas.core.categorical import Categorical from pandas.core.frame import DataFrame, _merge_doc @@ -15,7 +15,7 @@ from pandas.core.internals import (items_overlap_with_suffix, concatenate_block_managers) from pandas.util.decorators import Appender, Substitution -from pandas.core.common import ABCSeries +from pandas.core.common import ABCSeries, isnull import pandas.core.common as com @@ -912,8 +912,14 @@ def get_result(self): data = dict(zip(range(len(self.objs)), self.objs)) index, columns = self.new_axes tmpdf = DataFrame(data, index=index) - if columns is not None: - tmpdf.columns = columns + # checks if the column variable already stores valid column names (because set via the 'key' argument + # in the 'concat' function call. If that's not the case, use the series names as column names + if columns.equals(Index(np.arange(len(self.objs)))) and not self.ignore_index: + columns = np.array([ data[i].name for i in range(len(data)) ], dtype='object') + indexer = isnull(columns) + if indexer.any(): + columns[indexer] = np.arange(len(indexer[indexer])) + tmpdf.columns = columns return tmpdf.__finalize__(self, method='concat') # combine block managers diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index b7b7dd20a2045..bb359d386aae3 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -1879,6 +1879,24 @@ def test_concat_dataframe_keys_bug(self): self.assertEqual(list(result.columns), [('t1', 'value'), ('t2', 'value')]) + def test_concat_series_partial_columns_names(self): + # GH10698 + foo = pd.Series([1,2], name='foo') + bar = pd.Series([1,2]) + baz = pd.Series([4,5]) + + result = pd.concat([foo, bar, baz], axis=1) + expected = DataFrame({'foo' : [1,2], 0 : [1,2], 1 : [4,5]}, columns=['foo',0,1]) + tm.assert_frame_equal(result, expected) + + result = pd.concat([foo, bar, baz], axis=1, keys=['red','blue','yellow']) + expected = DataFrame({'red' : [1,2], 'blue' : [1,2], 'yellow' : [4,5]}, columns=['red','blue','yellow']) + tm.assert_frame_equal(result, expected) + + result = pd.concat([foo, bar, baz], axis=1, ignore_index=True) + expected = DataFrame({0 : [1,2], 1 : [1,2], 2 : [4,5]}) + tm.assert_frame_equal(result, expected) + def test_concat_dict(self): frames = {'foo': DataFrame(np.random.randn(4, 3)), 'bar': DataFrame(np.random.randn(4, 3)), @@ -2412,7 +2430,7 @@ def test_concat_series_axis1(self): s2.name = None result = concat([s, s2], axis=1) - self.assertTrue(np.array_equal(result.columns, lrange(2))) + self.assertTrue(np.array_equal(result.columns, Index(['A', 0], dtype='object'))) # must reindex, #2603 s = Series(randn(3), index=['c', 'a', 'b'], name='A')