diff --git a/doc/source/whatsnew/v3.1.0.rst b/doc/source/whatsnew/v3.1.0.rst index 3ef7906939259..21beb00dccea0 100644 --- a/doc/source/whatsnew/v3.1.0.rst +++ b/doc/source/whatsnew/v3.1.0.rst @@ -145,6 +145,7 @@ Performance improvements - Performance improvement in :func:`merge` with ``how="cross"`` (:issue:`38082`) - Performance improvement in :func:`merge` with ``how="left"`` (:issue:`64370`) - Performance improvement in :func:`merge` with ``sort=False`` for single-key ``how="left"``/``how="right"`` joins when the opposite join key is sorted, unique, and range-like (:issue:`64146`) +- Performance improvement in :func:`merge` (non-inner), :meth:`DataFrame.unstack`, :meth:`DataFrame.pivot_table`, :func:`crosstab`, and :class:`GroupBy` aggregations by short-circuiting scans for missing-indexer sentinels - Performance improvement in :func:`read_csv` with ``engine="c"`` when reading from binary file-like objects (e.g. PyArrow S3 file handles) by avoiding unnecessary ``TextIOWrapper`` wrapping (:issue:`46823`) - Performance improvement in :func:`read_html` and the Python CSV parser when ``thousands`` is set, fixing catastrophic regex backtracking on cells with many comma-separated digit groups followed by non-numeric text (:issue:`52619`) - Performance improvement in :func:`read_sas` by reading page header fields directly in Cython instead of falling back to Python (:issue:`47339`) diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index e50b301c34868..3c67fb89921dd 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -236,3 +236,7 @@ def is_sequence_range( sequence: np.ndarray, step: int, # np.ndarray[np.int64, ndim=1] ) -> bool: ... +def has_sentinel( + arr: np.ndarray, # const int6432_t[:] + sentinel: int, +) -> bool: ... diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 8df146041944e..373a40248202d 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -59,6 +59,8 @@ from numpy cimport ( complex128_t, flatiter, float64_t, + int8_t, + int16_t, int32_t, int64_t, intp_t, @@ -483,17 +485,30 @@ def get_reverse_indexer(const intp_t[:] indexer, Py_ssize_t length) -> ndarray: def has_infs(const floating[:] arr) -> bool: cdef: Py_ssize_t i, n = len(arr) + Py_ssize_t n4 = n & ~3 # round down to multiple of 4 floating inf, neginf, val bint ret = False inf = np.inf neginf = -inf with nogil: - for i in range(n): - val = arr[i] - if val == inf or val == neginf: + for i in range(0, n4, 4): + # Bitwise | (not `or`) so all 4 lanes evaluate unconditionally, + # letting the compiler emit vectorized comparisons. + if ( + (arr[i] == inf) | (arr[i] == neginf) + | (arr[i + 1] == inf) | (arr[i + 1] == neginf) + | (arr[i + 2] == inf) | (arr[i + 2] == neginf) + | (arr[i + 3] == inf) | (arr[i + 3] == neginf) + ): ret = True break + if not ret: + for i in range(n4, n): + val = arr[i] + if val == inf or val == neginf: + ret = True + break return ret @@ -664,16 +679,27 @@ def is_range_indexer(const int6432_t[:] left, Py_ssize_t n) -> bool: """ cdef: Py_ssize_t i + Py_ssize_t n4 = n & ~3 bint ret = True if left.size != n: return False with nogil: - for i in range(n): - if left[i] != i: + for i in range(0, n4, 4): + if ( + (left[i] != i) + | (left[i + 1] != i + 1) + | (left[i + 2] != i + 2) + | (left[i + 3] != i + 3) + ): ret = False break + if ret: + for i in range(n4, n): + if left[i] != i: + ret = False + break return ret @@ -685,6 +711,7 @@ def is_sequence_range(const int6432_t[:] sequence, int64_t step) -> bool: """ cdef: Py_ssize_t i, n = len(sequence) + Py_ssize_t n4 = n & ~3 int6432_t first_element bint ret = True @@ -694,11 +721,64 @@ def is_sequence_range(const int6432_t[:] sequence, int64_t step) -> bool: return True first_element = sequence[0] + # sequence[0] == first_element by construction, so the i=0 lane of the + # unrolled loop is trivially true — skipping the explicit head loop + # costs one redundant compare on the first iteration. with nogil: - for i in range(1, n): - if sequence[i] != first_element + i * step: + for i in range(0, n4, 4): + if ( + (sequence[i] != first_element + i * step) + | (sequence[i + 1] != first_element + (i + 1) * step) + | (sequence[i + 2] != first_element + (i + 2) * step) + | (sequence[i + 3] != first_element + (i + 3) * step) + ): ret = False break + if ret: + for i in range(n4, n): + if sequence[i] != first_element + i * step: + ret = False + break + return ret + + +ctypedef fused signed_int_t: + int8_t + int16_t + int32_t + int64_t + + +@cython.wraparound(False) +@cython.boundscheck(False) +def has_sentinel(const signed_int_t[:] arr, signed_int_t sentinel) -> bool: + """ + Faster equivalent to `(arr == sentinel).any()` for integer indexers. + """ + cdef: + Py_ssize_t i, n = arr.shape[0] + Py_ssize_t n8 = n & ~7 + bint ret = False + + with nogil: + for i in range(0, n8, 8): + if ( + (arr[i] == sentinel) + | (arr[i + 1] == sentinel) + | (arr[i + 2] == sentinel) + | (arr[i + 3] == sentinel) + | (arr[i + 4] == sentinel) + | (arr[i + 5] == sentinel) + | (arr[i + 6] == sentinel) + | (arr[i + 7] == sentinel) + ): + ret = True + break + if not ret: + for i in range(n8, n): + if arr[i] == sentinel: + ret = True + break return ret diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 859923a860f7d..fa0aa86458749 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4648,7 +4648,7 @@ def igetitem(obj, i: int): else: ilocs = self.columns.get_indexer_non_unique(key)[0] - if (ilocs < 0).any(): + if lib.has_sentinel(ilocs, -1): # key entries not in self.columns raise NotImplementedError diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a50763a5efe50..6c88f5daca7ed 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -4144,7 +4144,7 @@ def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]: if missing_mask.sum() > na_count: raise KeyError(k) from None # NaN is in k but must also be present in the data - if not (level_codes == -1).any(): + if not lib.has_sentinel(level_codes, -1): raise KeyError(k) from None elif missing_mask.any(): raise KeyError(k) from None diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 6737b03475469..2b8e71793fb9c 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1288,7 +1288,7 @@ def _maybe_add_join_keys( left_has_missing = ( False if left_indexer is None - else (left_indexer == -1).any() + else lib.has_sentinel(left_indexer, -1) ) if left_has_missing: @@ -1302,7 +1302,7 @@ def _maybe_add_join_keys( right_has_missing = ( False if right_indexer is None - else (right_indexer == -1).any() + else lib.has_sentinel(right_indexer, -1) ) if right_has_missing: diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index e7cabe794445a..c5aa47e080f4e 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -459,7 +459,7 @@ def new_index(self) -> MultiIndex | Index: # construct the new index if len(self.new_index_levels) == 1: level, level_codes = self.new_index_levels[0], result_codes[0] - if (level_codes == -1).any(): + if lib.has_sentinel(level_codes, -1): level = level.insert(len(level), level._na_value) return level.take(level_codes).rename(self.new_index_names[0]) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 21309ba4734fd..529ef81bb20c3 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -164,7 +164,7 @@ def _int64_cut_off(shape) -> int: def maybe_lift(lab, size: int) -> tuple[np.ndarray, int]: # promote nan values (assigned -1 label in lab array) # so that all output values are non-negative - return (lab + 1, size + 1) if (lab == -1).any() else (lab, size) + return (lab + 1, size + 1) if lib.has_sentinel(lab, -1) else (lab, size) labels = [ensure_int64(x) for x in labels] lshape = list(shape) @@ -282,7 +282,7 @@ def decons_obs_group_ids( If nulls are excluded; i.e. -1 labels are passed through. """ if not xnull: - lift = np.fromiter(((a == -1).any() for a in labels), dtype=np.intp) + lift = np.fromiter((lib.has_sentinel(a, -1) for a in labels), dtype=np.intp) arr_shape = np.asarray(shape, dtype=np.intp) + lift shape = tuple(arr_shape)