pandas-dev · tinezivic · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/doc/source/whatsnew/v3.1.0.rst b/doc/source/whatsnew/v3.1.0.rst
@@ -198,6 +198,7 @@ Bug fixes
 Categorical
 ^^^^^^^^^^^
 - Bug in :meth:`Categorical.__repr__` where the values and categories lines could exceed ``display.width`` (:issue:`12066`)
+- Bug in :meth:`Categorical.map` where unordered categoricals preserved the positional category order from the original categories instead of sorting the mapped values, causing :meth:`DataFrame.sort_values` with ``key`` to ignore custom sort orders (:issue:`58153`)
 - Bug in :meth:`CategoricalIndex.union` and :meth:`CategoricalIndex.intersection` giving incorrect results when the two indexes have the same unordered categories in different orders (:issue:`55335`)
 - Bug in :meth:`Index.fillna` raising ``TypeError`` when filling with a tuple value (e.g. on object-dtype or :class:`CategoricalIndex` with tuple categories) (:issue:`37681`)
 -
@@ -244,6 +245,7 @@ Conversion
 ^^^^^^^^^^
 - Bug in :class:`DataFrame` constructor where ``NaT`` in a :class:`TimedeltaIndex` row was incorrectly inferred as ``datetime64`` instead of ``timedelta64`` (:issue:`23985`)
 - Bug in :class:`DataFrame` constructor where constructing from a list of uniform-dtype arrays (e.g. pyarrow, :class:`CategoricalDtype`, nullable dtypes) lost the dtype (:issue:`49593`)
+- Bug in :func:`pd.array` raising ``ArrowTypeError`` when constructing an :class:`ArrowDtype` string array from a sequence containing ``np.nan`` (:issue:`64578`)
 - Bug in :func:`pd.array` silently converting NaN to a nonsensical integer when given float data containing NaN and a NumPy integer dtype (:issue:`41724`)
 - Fixed :func:`pandas.array` to preserve mask information when converting NumPy masked arrays, converting masked values to missing values (:issue:`63879`).
 - Fixed bug in :meth:`DataFrame.from_records` where ``exclude`` was ignored when ``data`` was an iterator and ``nrows=0`` (:issue:`63774`)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -650,7 +650,10 @@ def _box_pa_array(
             mask = None
             if is_nan_na():
                 try:
-                    arr_value = np.asarray(value)
+                    # GH#64578: use dtype=object to preserve scalar types
+                    # (e.g. np.nan stays float, not coerced to string 'nan')
+                    # so that isna() correctly identifies NA entries.
+                    arr_value = np.asarray(value, dtype=object)
                     if arr_value.ndim > 1:
                         # e.g. test_fixed_size_list we have list data.  ndim > 1
                         #  means there were no scalar (NA) entries.

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -3075,6 +3075,26 @@ def test_from_sequence_of_strings_boolean():
         ArrowExtensionArray._from_sequence_of_strings(strings, dtype=dtype)
 
 
+def test_arrow_array_constructor_with_nan():
+    # GH#64578: pd.array with ArrowDtype should treat np.nan as missing value
+    # regardless of the element dtype of the sequence.
+    import pyarrow as pa
+
+    result = pd.array(["a", np.nan], dtype=ArrowDtype(pa.string()))
+    expected = pd.array(["a", None], dtype=ArrowDtype(pa.string()))
+    tm.assert_extension_array_equal(result, expected)
+
+    # Also verify with large_string
+    result2 = pd.array(["a", np.nan], dtype=ArrowDtype(pa.large_string()))
+    expected2 = pd.array(["a", None], dtype=ArrowDtype(pa.large_string()))
+    tm.assert_extension_array_equal(result2, expected2)
+
+    # Mixed int/nan should still work
+    result3 = pd.array([1, np.nan, 3], dtype=ArrowDtype(pa.float64()))
+    expected3 = pd.array([1, None, 3], dtype=ArrowDtype(pa.float64()))
+    tm.assert_extension_array_equal(result3, expected3)
+
+
 def test_concat_empty_arrow_backed_series(dtype):
     # GH#51734
     ser = pd.Series([], dtype=dtype)