diff --git a/doc/source/whatsnew/v3.1.0.rst b/doc/source/whatsnew/v3.1.0.rst index 3ef7906939259..41e9a956612ff 100644 --- a/doc/source/whatsnew/v3.1.0.rst +++ b/doc/source/whatsnew/v3.1.0.rst @@ -326,6 +326,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ - Bug in :func:`merge` where merging on a :class:`MultiIndex` containing ``NaN`` values mapped ``NaN`` keys to the last level value instead of ``NaN`` (:issue:`64492`) +- Bug in :meth:`DataFrame.pivot_table` with ``margins=True`` raising ``TypeError`` when ``values`` has an :class:`ExtensionDtype` that cannot hold ``NA`` (e.g. :class:`IntervalDtype` with an integer subtype) and no ``columns`` were specified (:issue:`55484`) - Bug in :meth:`Index.union` where the result could be unsorted when both inputs were monotonic increasing but disjoint, when ``sort`` was not ``False`` (:issue:`54646`) - In :func:`pivot_table`, when ``values`` is empty, the aggregation will be computed on a Series of all NA values (:issue:`46475`) - diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 4ab6193da7f33..dcad09ad96b7a 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -502,20 +502,29 @@ def _add_margins( else: row_margin[k] = grand_margin[k[0]] + # GH#55484 recover the correct dtype when row_margin was initialized as + # object (len(cols)==0 path); no-op for already-typed series. + row_margin = row_margin.infer_objects() + from pandas import DataFrame row_margin = row_margin.reindex(result.columns, fill_value=fill_value) margin_dummy = DataFrame(row_margin, columns=Index([key])).T for dtype in set(result.dtypes): - if isinstance(dtype, ExtensionDtype): - # Can hold NA already - continue - cols = result.select_dtypes([dtype]).columns - margin_dummy[cols] = margin_dummy[cols].apply( - maybe_downcast_to_dtype, args=(dtype,) - ) + if isinstance(dtype, ExtensionDtype): + # GH#55484 margin_dummy may be object-dtype when row_margin was + # initialized with dtype=object (len(cols)==0 path); cast back. + margin_dummy[cols] = margin_dummy[cols].astype(dtype) + elif (margin_dummy[cols].dtypes == object).all() and dtype != object: + # GH#55484 object-initialized row_margin can leave non-EA columns + # as object (mixed-values case); astype back to the target dtype. + margin_dummy[cols] = margin_dummy[cols].astype(dtype) + else: + margin_dummy[cols] = margin_dummy[cols].apply( + maybe_downcast_to_dtype, args=(dtype,) + ) row_names = result.index.names result = concat([result, margin_dummy]) @@ -646,7 +655,10 @@ def _all_key(key): new_order_names = [row_margin.index.names[i] for i in new_order_indices] row_margin.index = row_margin.index.reorder_levels(new_order_names) else: - row_margin = data._constructor_sliced(np.nan, index=result.columns) + # GH#55484 use object dtype so setitem works for grand-margin scalars + # whose dtype cannot hold NA (e.g. IntervalDtype with integer subtype); + # infer_objects is called in _add_margins after the values are set. + row_margin = data._constructor_sliced(index=result.columns, dtype=object) return result, margin_keys, row_margin diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 251b5e47cb82f..e5988196d5589 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2444,6 +2444,69 @@ def test_pivot_table_nullable_margins(self): ) tm.assert_frame_equal(result, expected) + def test_pivot_table_ea_dtype_cannot_hold_na_margins(self): + # GH#55484 IntervalDtype with integer subtype has _can_hold_na=False; + # pivot_table with margins=True must still preserve the dtype. + ii = pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4]) + iarr = pd.array(list(ii), dtype=ii.dtype) + assert not iarr.dtype._can_hold_na + + df = DataFrame( + { + "a": ["foo", "foo", "bar", "bar"], + "b": ["one", "two", "one", "two"], + "c": iarr, + } + ) + + result = df.pivot_table( + index="a", + columns="b", + values="c", + aggfunc=lambda x: x.iloc[0], + margins=True, + ) + expected = DataFrame( + { + "one": pd.array([ii[2], ii[0], ii[0]], dtype=ii.dtype), + "two": pd.array([ii[3], ii[1], ii[1]], dtype=ii.dtype), + "All": pd.array([ii[2], ii[0], ii[0]], dtype=ii.dtype), + }, + index=Index(["bar", "foo", "All"], name="a"), + ) + expected.columns.name = "b" + tm.assert_frame_equal(result, expected) + + # no-cols path (previously raised TypeError on row_margin init) + result = df.pivot_table( + index="a", + values="c", + aggfunc=lambda x: x.iloc[0], + margins=True, + ) + expected = DataFrame( + {"c": pd.array([ii[2], ii[0], ii[0]], dtype=ii.dtype)}, + index=Index(["bar", "foo", "All"], name="a"), + ) + tm.assert_frame_equal(result, expected) + + # no-cols with mixed EA-no-NA and numeric values + df["d"] = [1.0, 2.0, 3.0, 4.0] + result = df.pivot_table( + index="a", + values=["c", "d"], + aggfunc=lambda x: x.iloc[0], + margins=True, + ) + expected = DataFrame( + { + "c": pd.array([ii[2], ii[0], ii[0]], dtype=ii.dtype), + "d": [3.0, 1.0, 1.0], + }, + index=Index(["bar", "foo", "All"], name="a"), + ) + tm.assert_frame_equal(result, expected) + def test_pivot_table_sort_false_with_multiple_values(self): df = DataFrame( {