Skip to content

Commit

Permalink
Merge pull request pandas-dev#10723 from IamGianluca/issue_10698_fix
Browse files Browse the repository at this point in the history
BUG: concat of Series w/o names pandas-dev#10698
  • Loading branch information
jreback committed Sep 2, 2015
2 parents 582eb17 + fa29a13 commit 207efc2
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 6 deletions.
19 changes: 18 additions & 1 deletion doc/source/merging.rst
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,24 @@ Passing ``ignore_index=True`` will drop all name references.
More concatenating with group keys
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Let's consider a variation on the first example presented:
A fairly common use of the ``keys`` argument is to override the column names when creating a new DataFrame based on existing Series.
Notice how the default behaviour consists on letting the resulting DataFrame inherits the parent Series' name, when these existed.

.. ipython:: python
s3 = pd.Series([0, 1, 2, 3], name='foo')
s4 = pd.Series([0, 1, 2, 3])
s5 = pd.Series([0, 1, 4, 5])
pd.concat([s3, s4, s5], axis=1)
Through the ``keys`` argument we can override the existing column names.

.. ipython:: python
pd.concat([s3, s4, s5], axis=1, keys=['red','blue','yellow'])
Let's consider now a variation on the very first example presented:

.. ipython:: python
Expand Down
24 changes: 24 additions & 0 deletions doc/source/whatsnew/v0.17.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,30 @@ Other enhancements

- ``DataFrame.apply`` will return a Series of dicts if the passed function returns a dict and ``reduce=True`` (:issue:`8735`).

- ``concat`` will now use existing Series names if provided (:issue:`10698`).

.. ipython:: python

foo = pd.Series([1,2], name='foo')
bar = pd.Series([1,2])
baz = pd.Series([4,5])

Previous Behavior:

.. code-block:: python

In [1] pd.concat([foo, bar, baz], 1)
Out[1]:
0 1 2
0 1 1 4
1 2 2 5

New Behavior:

.. ipython:: python

pd.concat([foo, bar, baz], 1)

.. _whatsnew_0170.api:

.. _whatsnew_0170.api_breaking:
Expand Down
14 changes: 10 additions & 4 deletions pandas/tools/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

import numpy as np
from pandas.compat import range, long, lrange, lzip, zip, map, filter
from pandas.compat import range, lrange, lzip, zip, map, filter
import pandas.compat as compat
from pandas.core.categorical import Categorical
from pandas.core.frame import DataFrame, _merge_doc
Expand All @@ -15,7 +15,7 @@
from pandas.core.internals import (items_overlap_with_suffix,
concatenate_block_managers)
from pandas.util.decorators import Appender, Substitution
from pandas.core.common import ABCSeries
from pandas.core.common import ABCSeries, isnull

import pandas.core.common as com

Expand Down Expand Up @@ -912,8 +912,14 @@ def get_result(self):
data = dict(zip(range(len(self.objs)), self.objs))
index, columns = self.new_axes
tmpdf = DataFrame(data, index=index)
if columns is not None:
tmpdf.columns = columns
# checks if the column variable already stores valid column names (because set via the 'key' argument
# in the 'concat' function call. If that's not the case, use the series names as column names
if columns.equals(Index(np.arange(len(self.objs)))) and not self.ignore_index:
columns = np.array([ data[i].name for i in range(len(data)) ], dtype='object')
indexer = isnull(columns)
if indexer.any():
columns[indexer] = np.arange(len(indexer[indexer]))
tmpdf.columns = columns
return tmpdf.__finalize__(self, method='concat')

# combine block managers
Expand Down
20 changes: 19 additions & 1 deletion pandas/tools/tests/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1879,6 +1879,24 @@ def test_concat_dataframe_keys_bug(self):
self.assertEqual(list(result.columns), [('t1', 'value'),
('t2', 'value')])

def test_concat_series_partial_columns_names(self):
# GH10698
foo = pd.Series([1,2], name='foo')
bar = pd.Series([1,2])
baz = pd.Series([4,5])

result = pd.concat([foo, bar, baz], axis=1)
expected = DataFrame({'foo' : [1,2], 0 : [1,2], 1 : [4,5]}, columns=['foo',0,1])
tm.assert_frame_equal(result, expected)

result = pd.concat([foo, bar, baz], axis=1, keys=['red','blue','yellow'])
expected = DataFrame({'red' : [1,2], 'blue' : [1,2], 'yellow' : [4,5]}, columns=['red','blue','yellow'])
tm.assert_frame_equal(result, expected)

result = pd.concat([foo, bar, baz], axis=1, ignore_index=True)
expected = DataFrame({0 : [1,2], 1 : [1,2], 2 : [4,5]})
tm.assert_frame_equal(result, expected)

def test_concat_dict(self):
frames = {'foo': DataFrame(np.random.randn(4, 3)),
'bar': DataFrame(np.random.randn(4, 3)),
Expand Down Expand Up @@ -2412,7 +2430,7 @@ def test_concat_series_axis1(self):

s2.name = None
result = concat([s, s2], axis=1)
self.assertTrue(np.array_equal(result.columns, lrange(2)))
self.assertTrue(np.array_equal(result.columns, Index(['A', 0], dtype='object')))

# must reindex, #2603
s = Series(randn(3), index=['c', 'a', 'b'], name='A')
Expand Down

0 comments on commit 207efc2

Please sign in to comment.