Merge pull request pandas-dev#10723 from IamGianluca/issue_10698_fix

BUG: concat of Series w/o names pandas-dev#10698
stephenpascoe · Sep 2, 2015 · 207efc2 · 207efc2
2 parents 582eb17 + fa29a13
commit 207efc2
Show file tree

Hide file tree

Showing 4 changed files with 71 additions and 6 deletions.
diff --git a/doc/source/merging.rst b/doc/source/merging.rst
@@ -352,7 +352,24 @@ Passing ``ignore_index=True`` will drop all name references.
 More concatenating with group keys
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Let's consider a variation on the first example presented:
+A fairly common use of the ``keys`` argument is to override the column names when creating a new DataFrame based on existing Series.
+Notice how the default behaviour consists on letting the resulting DataFrame inherits the parent Series' name, when these existed.
+
+.. ipython:: python
+
+   s3 = pd.Series([0, 1, 2, 3], name='foo')
+   s4 = pd.Series([0, 1, 2, 3])
+   s5 = pd.Series([0, 1, 4, 5])
+
+   pd.concat([s3, s4, s5], axis=1)
+
+Through the ``keys`` argument we can override the existing column names.
+
+.. ipython:: python
+
+   pd.concat([s3, s4, s5], axis=1, keys=['red','blue','yellow'])
+
+Let's consider now a variation on the very first example presented:
 
 .. ipython:: python
 

diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
@@ -231,6 +231,30 @@ Other enhancements
 
 - ``DataFrame.apply`` will return a Series of dicts if the passed function returns a dict and ``reduce=True`` (:issue:`8735`).
 
+- ``concat`` will now use existing Series names if provided (:issue:`10698`).
+
+  .. ipython:: python
+
+     foo = pd.Series([1,2], name='foo')
+     bar = pd.Series([1,2])
+     baz = pd.Series([4,5])
+
+  Previous Behavior:
+
+  .. code-block:: python
+
+     In [1] pd.concat([foo, bar, baz], 1)
+     Out[1]:
+           0  1  2
+        0  1  1  4
+        1  2  2  5
+
+  New Behavior:
+
+  .. ipython:: python
+
+    pd.concat([foo, bar, baz], 1)
+
 .. _whatsnew_0170.api:
 
 .. _whatsnew_0170.api_breaking:

diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py
@@ -3,7 +3,7 @@
 """
 
 import numpy as np
-from pandas.compat import range, long, lrange, lzip, zip, map, filter
+from pandas.compat import range, lrange, lzip, zip, map, filter
 import pandas.compat as compat
 from pandas.core.categorical import Categorical
 from pandas.core.frame import DataFrame, _merge_doc
@@ -15,7 +15,7 @@
 from pandas.core.internals import (items_overlap_with_suffix,
                                    concatenate_block_managers)
 from pandas.util.decorators import Appender, Substitution
-from pandas.core.common import ABCSeries
+from pandas.core.common import ABCSeries, isnull
 
 import pandas.core.common as com
 
@@ -912,8 +912,14 @@ def get_result(self):
                 data = dict(zip(range(len(self.objs)), self.objs))
                 index, columns = self.new_axes
                 tmpdf = DataFrame(data, index=index)
-                if columns is not None:
-                    tmpdf.columns = columns
+                # checks if the column variable already stores valid column names (because set via the 'key' argument
+                # in the 'concat' function call. If that's not the case, use the series names as column names
+                if columns.equals(Index(np.arange(len(self.objs)))) and not self.ignore_index:
+                    columns = np.array([ data[i].name for i in range(len(data)) ], dtype='object')
+                    indexer = isnull(columns)
+                    if indexer.any():
+                        columns[indexer] = np.arange(len(indexer[indexer]))
+                tmpdf.columns = columns
                 return tmpdf.__finalize__(self, method='concat')
 
         # combine block managers

diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py
@@ -1879,6 +1879,24 @@ def test_concat_dataframe_keys_bug(self):
         self.assertEqual(list(result.columns), [('t1', 'value'),
                                                 ('t2', 'value')])
 
+    def test_concat_series_partial_columns_names(self):
+        # GH10698
+        foo = pd.Series([1,2], name='foo')
+        bar = pd.Series([1,2])
+        baz = pd.Series([4,5])
+
+        result = pd.concat([foo, bar, baz], axis=1)
+        expected = DataFrame({'foo' : [1,2], 0 : [1,2], 1 : [4,5]}, columns=['foo',0,1])
+        tm.assert_frame_equal(result, expected)
+
+        result = pd.concat([foo, bar, baz], axis=1, keys=['red','blue','yellow'])
+        expected = DataFrame({'red' : [1,2], 'blue' : [1,2], 'yellow' : [4,5]}, columns=['red','blue','yellow'])
+        tm.assert_frame_equal(result, expected)
+
+        result = pd.concat([foo, bar, baz], axis=1, ignore_index=True)
+        expected = DataFrame({0 : [1,2], 1 : [1,2], 2 : [4,5]})
+        tm.assert_frame_equal(result, expected)
+
     def test_concat_dict(self):
         frames = {'foo': DataFrame(np.random.randn(4, 3)),
                   'bar': DataFrame(np.random.randn(4, 3)),
@@ -2412,7 +2430,7 @@ def test_concat_series_axis1(self):
 
         s2.name = None
         result = concat([s, s2], axis=1)
-        self.assertTrue(np.array_equal(result.columns, lrange(2)))
+        self.assertTrue(np.array_equal(result.columns, Index(['A', 0], dtype='object')))
 
         # must reindex, #2603
         s = Series(randn(3), index=['c', 'a', 'b'], name='A')