Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ Groupby/resample/rolling
Reshaping
^^^^^^^^^
- Bug in :func:`merge` where merging on a :class:`MultiIndex` containing ``NaN`` values mapped ``NaN`` keys to the last level value instead of ``NaN`` (:issue:`64492`)
- Bug in :meth:`DataFrame.pivot_table` with ``margins=True`` raising ``TypeError`` when ``values`` has an :class:`ExtensionDtype` that cannot hold ``NA`` (e.g. :class:`IntervalDtype` with an integer subtype) and no ``columns`` were specified (:issue:`55484`)
- Bug in :meth:`Index.union` where the result could be unsorted when both inputs were monotonic increasing but disjoint, when ``sort`` was not ``False`` (:issue:`54646`)
- In :func:`pivot_table`, when ``values`` is empty, the aggregation will be computed on a Series of all NA values (:issue:`46475`)
-
Expand Down
28 changes: 20 additions & 8 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,20 +502,29 @@ def _add_margins(
else:
row_margin[k] = grand_margin[k[0]]

# GH#55484 recover the correct dtype when row_margin was initialized as
# object (len(cols)==0 path); no-op for already-typed series.
row_margin = row_margin.infer_objects()

from pandas import DataFrame

row_margin = row_margin.reindex(result.columns, fill_value=fill_value)
margin_dummy = DataFrame(row_margin, columns=Index([key])).T

for dtype in set(result.dtypes):
if isinstance(dtype, ExtensionDtype):
# Can hold NA already
continue

cols = result.select_dtypes([dtype]).columns
margin_dummy[cols] = margin_dummy[cols].apply(
maybe_downcast_to_dtype, args=(dtype,)
)
if isinstance(dtype, ExtensionDtype):
# GH#55484 margin_dummy may be object-dtype when row_margin was
# initialized with dtype=object (len(cols)==0 path); cast back.
margin_dummy[cols] = margin_dummy[cols].astype(dtype)
elif (margin_dummy[cols].dtypes == object).all() and dtype != object:
# GH#55484 object-initialized row_margin can leave non-EA columns
# as object (mixed-values case); astype back to the target dtype.
margin_dummy[cols] = margin_dummy[cols].astype(dtype)
else:
margin_dummy[cols] = margin_dummy[cols].apply(
maybe_downcast_to_dtype, args=(dtype,)
)

row_names = result.index.names
result = concat([result, margin_dummy])
Expand Down Expand Up @@ -646,7 +655,10 @@ def _all_key(key):
new_order_names = [row_margin.index.names[i] for i in new_order_indices]
row_margin.index = row_margin.index.reorder_levels(new_order_names)
else:
row_margin = data._constructor_sliced(np.nan, index=result.columns)
# GH#55484 use object dtype so setitem works for grand-margin scalars
# whose dtype cannot hold NA (e.g. IntervalDtype with integer subtype);
# infer_objects is called in _add_margins after the values are set.
row_margin = data._constructor_sliced(index=result.columns, dtype=object)

return result, margin_keys, row_margin

Expand Down
63 changes: 63 additions & 0 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2444,6 +2444,69 @@ def test_pivot_table_nullable_margins(self):
)
tm.assert_frame_equal(result, expected)

def test_pivot_table_ea_dtype_cannot_hold_na_margins(self):
# GH#55484 IntervalDtype with integer subtype has _can_hold_na=False;
# pivot_table with margins=True must still preserve the dtype.
ii = pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4])
iarr = pd.array(list(ii), dtype=ii.dtype)
assert not iarr.dtype._can_hold_na

df = DataFrame(
{
"a": ["foo", "foo", "bar", "bar"],
"b": ["one", "two", "one", "two"],
"c": iarr,
}
)

result = df.pivot_table(
index="a",
columns="b",
values="c",
aggfunc=lambda x: x.iloc[0],
margins=True,
)
expected = DataFrame(
{
"one": pd.array([ii[2], ii[0], ii[0]], dtype=ii.dtype),
"two": pd.array([ii[3], ii[1], ii[1]], dtype=ii.dtype),
"All": pd.array([ii[2], ii[0], ii[0]], dtype=ii.dtype),
},
index=Index(["bar", "foo", "All"], name="a"),
)
expected.columns.name = "b"
tm.assert_frame_equal(result, expected)

# no-cols path (previously raised TypeError on row_margin init)
result = df.pivot_table(
index="a",
values="c",
aggfunc=lambda x: x.iloc[0],
margins=True,
)
expected = DataFrame(
{"c": pd.array([ii[2], ii[0], ii[0]], dtype=ii.dtype)},
index=Index(["bar", "foo", "All"], name="a"),
)
tm.assert_frame_equal(result, expected)

# no-cols with mixed EA-no-NA and numeric values
df["d"] = [1.0, 2.0, 3.0, 4.0]
result = df.pivot_table(
index="a",
values=["c", "d"],
aggfunc=lambda x: x.iloc[0],
margins=True,
)
expected = DataFrame(
{
"c": pd.array([ii[2], ii[0], ii[0]], dtype=ii.dtype),
"d": [3.0, 1.0, 1.0],
},
index=Index(["bar", "foo", "All"], name="a"),
)
tm.assert_frame_equal(result, expected)

def test_pivot_table_sort_false_with_multiple_values(self):
df = DataFrame(
{
Expand Down
Loading