rapidsai · galipremsagar · May 1, 2026 · May 6, 2026 · May 6, 2026 · mroeschke
@@ -98,7 +98,14 @@ def _return_or_inplace(  # type: ignore[misc]
                         index=self._parent.index,  # type: ignore[union-attr]
                         attrs=self._parent.attrs,  # type: ignore[union-attr]
                     )
-                    if len(table) == 0:
+                    keys = (
+                        tuple(table.keys()) if hasattr(table, "keys") else ()
-                        tuple(table.keys()) if hasattr(table, "keys") else ()
+                        tuple(table.keys()) if isinstance(table, dict) else ()
-                        tuple(table.keys()) if hasattr(table, "keys") else ()
+                        tuple(table.keys()) if isinstance(table, dict) else ()
+                    )
+                    if len(table) == 0 or (
+                        keys
+                        and all(isinstance(k, int) for k in keys)
+                        and tuple(keys) == tuple(range(len(keys)))
-                        and tuple(keys) == tuple(range(len(keys)))
+                        and keys == tuple(range(len(keys)))
-                        and tuple(keys) == tuple(range(len(keys)))
+                        and keys == tuple(range(len(keys)))
+                    ):
                         df._data.rangeindex = True
                     return df
             elif isinstance(self._parent, cudf.Series):

@@ -2126,6 +2126,15 @@ def _concat(
         # Reassign index and column names
         if objs[0]._data.multiindex:
             out._set_columns_like(objs[0]._data)
+        elif (
+            all(obj._data.rangeindex for obj in objs)
+            and all(
+                tuple(obj._column_names) == tuple(range(obj._num_columns))
+                for obj in objs
+            )
+            and tuple(names) == tuple(range(len(names)))
+        ):
+            out.columns = cudf.RangeIndex(len(names))
         else:
             out.columns = names
         if not ignore_index:
@@ -2419,6 +2428,9 @@ def _fill_same_ca_attributes(
             else:
                 raise ValueError("other must be a DataFrame or Series.")
 
+            if isinstance(column_names_list, pd.MultiIndex):
+                ca_attributes["multiindex"] = True
+                ca_attributes["level_names"] = tuple(column_names_list.names)
             sorted_dict = {key: operands[key] for key in column_names_list}
             return sorted_dict, index, ca_attributes
         return operands, index, ca_attributes
@@ -4805,6 +4817,23 @@ def join(
         df.index.name = (
             None if self.index.name != other.index.name else self.index.name
         )
+
+        # Preserve a CategoricalIndex columns axis when both inputs share the
+        # same categorical dtype on their column labels (matches pandas).
+        self_pd_cols = self._data.to_pandas_index
+        other_pd_cols = other._data.to_pandas_index
+        if (
+            isinstance(self_pd_cols, pd.CategoricalIndex)
+            and isinstance(other_pd_cols, pd.CategoricalIndex)
+            and self_pd_cols.dtype == other_pd_cols.dtype
+        ):
+            df.columns = pd.CategoricalIndex(
+                list(self_pd_cols) + list(other_pd_cols),
+                dtype=self_pd_cols.dtype,
+                name=self_pd_cols.name
+                if self_pd_cols.name == other_pd_cols.name
+                else None,
+            )
-            df.columns = pd.CategoricalIndex(
-                list(self_pd_cols) + list(other_pd_cols),
-                dtype=self_pd_cols.dtype,
-                name=self_pd_cols.name
-                if self_pd_cols.name == other_pd_cols.name
-                else None,
-            )
+            df.columns = self_pd_cols.append(other_pd_cols)
-            df.columns = pd.CategoricalIndex(
-                list(self_pd_cols) + list(other_pd_cols),
-                dtype=self_pd_cols.dtype,
-                name=self_pd_cols.name
-                if self_pd_cols.name == other_pd_cols.name
-                else None,
-            )
+            df.columns = self_pd_cols.append(other_pd_cols)
         return df
 
     @_performance_tracking
@@ -6369,7 +6398,15 @@ def quantile(
                     if len(res) == 0:
                         res = column_empty(row_count=len(qs), dtype=ser.dtype)
                     result[k] = res
-            result = DataFrame._from_data(result, attrs=self.attrs)
+            result_ca = ColumnAccessor(
+                result,
+                multiindex=data_df._data.multiindex,
+                level_names=data_df._data.level_names,
+                rangeindex=data_df._data.rangeindex,
+                label_dtype=data_df._data.label_dtype,
+                verify=False,
+            )
+            result = DataFrame._from_data(result_ca, attrs=self.attrs)
 
             if q_is_number and numeric_only:
                 result = result.fillna(np.nan).iloc[0]
@@ -7233,7 +7270,12 @@ def cudf_dtype_from_pydata_dtype(dtype):
             for label, dtype in self._dtypes
             if cudf_dtype_from_pydata_dtype(dtype) in inclusion
         ]
-        return self.loc[:, to_select]
+        result = self.loc[:, to_select]
+        if not to_select and self._data.rangeindex:
+            # Preserve RangeIndex columns through an empty selection so that
+            # downstream operations match pandas' column metadata.
+            result._data.rangeindex = True
+        return result
 
     @ioutils.doc_to_parquet()
     def to_parquet(

@@ -1111,12 +1111,27 @@ def agg(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
                 if cast_dtype is not None:
                     result_col = result_col.astype(cast_dtype)
                 data[key] = result_col
-        data = ColumnAccessor(data, multiindex=multilevel)
+        from cudf.core.dataframe import DataFrame
+
+        # Preserve the column axis label-dtype/level_names from the source
+        # DataFrame so that aggregations such as ``nunique`` keep the column
+        # axis name (matching pandas behavior).
+        if (
+            not multilevel
+            and isinstance(self.obj, DataFrame)
-            and isinstance(self.obj, DataFrame)
+            and self.obj.ndim == 2
-            and isinstance(self.obj, DataFrame)
+            and self.obj.ndim == 2
+            and self.obj._data.level_names != (None,)
+        ):
+            data = ColumnAccessor(
+                data,
+                multiindex=False,
+                level_names=self.obj._data.level_names,
+                label_dtype=self.obj._data.label_dtype,
+            )
+        else:
+            data = ColumnAccessor(data, multiindex=multilevel)
         if not multilevel:
             data = data.rename_levels({np.nan: None}, level=0)
 
-        from cudf.core.dataframe import DataFrame
-
         result = DataFrame._from_data(data, index=result_index)
 
         if self._sort:
@@ -2753,6 +2768,8 @@ def _scan_fill(
     ) -> DataFrameOrSeries:
         """Internal implementation for `ffill` and `bfill`"""
         values = self.grouping.values
+        from cudf.core.dataframe import DataFrame
+
         result = self.obj._from_data(
             dict(
                 zip(
@@ -2762,6 +2779,33 @@ def _scan_fill(
                 )
             )
         )
+        # Pandas' groupby.ffill/bfill builds the result columns via a ``take``
+        # on the input columns, which converts integer-valued column labels
+        # to object dtype. Reproduce that here so column metadata matches.
+        if (
+            isinstance(result, DataFrame)
+            and isinstance(self.obj, DataFrame)
+            and result._num_columns < self.obj._num_columns
+        ):
+            source_pd_cols = self.obj._data.to_pandas_index
+            if (
+                source_pd_cols.dtype.kind in {"i", "u"}
+                or source_pd_cols.dtype == object
+            ):
+                try:
+                    positions = [
+                        source_pd_cols.get_loc(c) for c in result._column_names
+                    ]
+                except (KeyError, TypeError):
+                    positions = None
+                if positions is not None:
+                    taken = source_pd_cols.take(positions)
+                    if (
+                        not isinstance(taken, pd.MultiIndex)
+                        and taken.dtype != object
+                    ):
+                        taken = taken.astype(object)
+                    result.columns = taken
         return self._mimic_pandas_order(result)
 
     def ffill(self, limit: int | None = None):

@@ -4013,11 +4013,13 @@ def _reindex(
 
         index = index if index is not None else df.index
 
+        label_dtype = None
         if column_names is None:
             names = list(df._column_names)
             level_names = self._data.level_names
             multiindex = self._data.multiindex
             rangeindex = self._data.rangeindex
+            label_dtype = self._data.label_dtype
         elif isinstance(column_names, (pd.Index, cudf.Index)):
             if isinstance(column_names, (pd.MultiIndex, cudf.MultiIndex)):
                 multiindex = True
@@ -4034,6 +4036,12 @@ def _reindex(
                 rangeindex = isinstance(
                     column_names, (pd.RangeIndex, cudf.RangeIndex)
                 )
+                if not rangeindex:
+                    label_dtype = (
+                        column_names.dtype
+                        if isinstance(column_names, pd.Index)
+                        else column_names.to_pandas().dtype
+                    )
             level_names = tuple(column_names.names)
         else:
             names = column_names
@@ -4066,6 +4074,7 @@ def _reindex(
                 multiindex=multiindex,
                 level_names=level_names,
                 rangeindex=rangeindex,
+                label_dtype=label_dtype,
             ),
             index=index,
             attrs=self.attrs,

@@ -129,7 +129,10 @@ def _normalize_series_and_dataframe(
             name = obj.name
             if name is None:
                 if axis == 0:
-                    name = 0
+                    # Preserve "unnamed" semantics so the resulting frame has
+                    # a RangeIndex columns object (matching pandas).
+                    objs[idx] = obj.to_frame()
+                    continue
                 else:
                     name = sr_name
                     sr_name += 1
@@ -1063,12 +1066,39 @@ def pivot(
                 index_data = index_data.get_level_values(0)
         else:
             index_data = cudf.Index(index_data)
+        # An entirely empty input pivots to an empty result. Pandas uses the
+        # default ``object`` dtype for the resulting index axis in that case;
+        # mirror this so index metadata (dtype/inferred_type) matches.
+        if (
+            len(data) == 0
+            and not isinstance(index_data, cudf.MultiIndex)
+            and isinstance(index_data.dtype, pd.StringDtype)
+        ):
+            index_data = cudf.Index(
+                pd.Index([], name=index_data.name, dtype=object)
+            )
 
     column_data = data.loc[:, columns]
+    # When `columns` is a scalar but the source DataFrame has a MultiIndex on
+    # the row axis, ``loc`` may return a 2-D selection in cuDF. Treat the
+    # selection as 1-D so we end up with a flat Index of column labels.
+    if is_scalar(columns) and column_data.ndim == 2:
+        column_data = column_data.iloc[:, 0]
     if column_data.ndim == 2:
         column_data = cudf.MultiIndex.from_frame(column_data)
     else:
         column_data = cudf.Index(column_data)
+    # An entirely empty input pivots to an empty result. Pandas reports the
+    # default ``object`` dtype for the resulting columns axis in that case;
+    # mirror this so column metadata (dtype/inferred_type) matches.
+    if (
+        len(data) == 0
+        and not isinstance(column_data, cudf.MultiIndex)
+        and isinstance(column_data.dtype, pd.StringDtype)
+    ):
+        column_data = cudf.Index(
+            pd.Index([], name=column_data.name, dtype=object)
+        )
 
     # Create a DataFrame composed of columns from both
     # columns and index

@@ -254,11 +254,22 @@ def tolist(self) -> None:
     def _to_frame(self, name: Hashable, index: Index | None) -> DataFrame:
         """Helper function for Series.to_frame, Index.to_frame"""
 
+        unnamed_default = False
+        col_name: Hashable
         if name is no_default:
-            col_name = 0 if self.name is None else self.name
+            if self.name is None:
+                col_name = 0
+                unnamed_default = True
+            else:
+                col_name = self.name
         else:
             col_name = name
-        ca = ColumnAccessor({col_name: self._column}, verify=False)
+        ca = ColumnAccessor(
+            {col_name: self._column},
+            multiindex=isinstance(col_name, tuple),
+            rangeindex=unnamed_default,
+            verify=False,
+        )
         # TODO: Avoid accessing DataFrame from the top level namespace
         return cudf.DataFrame._from_data(ca, index=index)