diff --git a/docs/detection/utils.md b/docs/detection/utils.md index 5f1902b79c..c503b6149f 100644 --- a/docs/detection/utils.md +++ b/docs/detection/utils.md @@ -71,6 +71,12 @@ status: new :::supervision.detection.utils.scale_boxes +
+ +:::supervision.detection.utils.resolve_letterbox + diff --git a/supervision/__init__.py b/supervision/__init__.py index e2ed4bb75c..b9b91aba66 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -73,6 +73,7 @@ pad_boxes, polygon_to_mask, polygon_to_xyxy, + resolve_letterbox, scale_boxes, xcycwh_to_xyxy, xywh_to_xyxy, @@ -217,6 +218,7 @@ "polygon_to_xyxy", "process_video", "resize_image", + "resolve_letterbox", "rle_to_mask", "scale_boxes", "scale_image", diff --git a/supervision/detection/utils.py b/supervision/detection/utils.py index f6bcd33bce..f0c109f50a 100644 --- a/supervision/detection/utils.py +++ b/supervision/detection/utils.py @@ -762,6 +762,52 @@ def scale_boxes( return np.concatenate((centers - new_sizes / 2, centers + new_sizes / 2), axis=1) +def resolve_letterbox( + xyxy: npt.NDArray[np.float64], + letterbox_wh: Tuple[int, int], + resolution_wh: Tuple[int, int], +) -> npt.NDArray[np.float64]: + """ + Resolves the bounding box coordinates from letterbox format + to the required resolution. + Args: + xyxy (npt.NDArray[np.float64]): An array of shape `(n, 4)` containing the + bounding boxes coordinates in format `[x1, y1, x2, y2]` + letterbox_wh (Tuple[int, int]): The target resolution as `(width, height)`. + resolution_wh (Tuple[int, int]): The target resolution as `(width, height)`. + + Returns: + Detections: A new Detections object with the bounding box coordinates resolved + to the target resolution. + """ + + input_w, input_h = resolution_wh + letterbox_w, letterbox_h = letterbox_wh + + target_ratio = letterbox_w / letterbox_h + image_ratio = input_w / input_h + if image_ratio >= target_ratio: + width_new = letterbox_w + height_new = int(letterbox_w / image_ratio) + else: + height_new = letterbox_h + width_new = int(letterbox_h * image_ratio) + + scale = input_w / width_new + + padding_top = (letterbox_h - height_new) // 2 + padding_left = (letterbox_w - width_new) // 2 + + boxes = xyxy.copy() + boxes[:, [0, 2]] -= padding_left + boxes[:, [1, 3]] -= padding_top + + boxes[:, [0, 2]] *= scale + boxes[:, [1, 3]] *= scale + + return boxes + + def calculate_masks_centroids(masks: np.ndarray) -> np.ndarray: """ Calculate the centroids of binary masks in a tensor.