Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions common/src/main/java/org/apache/sedona/common/Predicates.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.sedona.common;

import org.apache.sedona.common.geometryObjects.Box2D;
import org.apache.sedona.common.sphere.Spheroid;
import org.locationtech.jts.geom.*;
import org.locationtech.jts.operation.relate.RelateOp;
Expand All @@ -27,6 +28,55 @@ public static boolean contains(Geometry leftGeometry, Geometry rightGeometry) {
return leftGeometry.contains(rightGeometry);
}

/**
* Closed-interval bbox intersection: true if {@code a} and {@code b} overlap on <em>both</em> the
* X and Y axes (matches PostGIS {@code &&} on box2d). Edge- and corner-touching boxes count as
* intersecting.
*
* <p>Both arguments must have ordered bounds ({@code xmin <= xmax} and {@code ymin <= ymax}).
* Sedona's Box2D type allows inverted bounds ({@code xmin > xmax}) — that ordering is reserved
* for a future antimeridian-wraparound semantics on geography bboxes (cf. sedona-db's {@code
* WraparoundInterval}). Until those semantics ship, planar predicates throw on inverted input
* rather than silently returning misleading results. SQL callers see NULL in/out null
* propagation; this Java entry point throws on null.
*/
public static boolean boxIntersects(Box2D a, Box2D b) {
requireOrderedPlanarBox(a, "a");
requireOrderedPlanarBox(b, "b");
return !(a.getXMax() < b.getXMin()
|| a.getXMin() > b.getXMax()
|| a.getYMax() < b.getYMin()
|| a.getYMin() > b.getYMax());
Comment on lines +43 to +49
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in 6726b1b — went with option (a). boxIntersects/boxContains now call requireOrderedPlanarBox and throw IllegalArgumentException with a clear message on inverted bounds. Defining wraparound semantics on a planar predicate without the geography work in #2929 would be premature; failing fast is the honest contract for now.

}

/**
* True if {@code a} fully contains {@code b} on <em>both</em> the X and Y axes (closed intervals;
* matches PostGIS {@code ~} on box2d). Equal boxes contain each other.
*
* <p>Same ordered-bound contract as {@link #boxIntersects(Box2D, Box2D)} — inverted bounds throw
* because planar containment with inverted intervals has no defined meaning until antimeridian
* wraparound semantics ship.
*/
public static boolean boxContains(Box2D a, Box2D b) {
requireOrderedPlanarBox(a, "a");
requireOrderedPlanarBox(b, "b");
return a.getXMin() <= b.getXMin()
&& a.getYMin() <= b.getYMin()
&& a.getXMax() >= b.getXMax()
&& a.getYMax() >= b.getYMax();
Comment on lines +60 to +66
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same fix in 6726b1bboxContains now validates ordered bounds before evaluating containment.

}

private static void requireOrderedPlanarBox(Box2D box, String argName) {
if (box.getXMin() > box.getXMax() || box.getYMin() > box.getYMax()) {
throw new IllegalArgumentException(
"Box2D argument '"
+ argName
+ "' has inverted bounds (xmin > xmax or ymin > ymax). Planar Box2D predicates "
+ "require ordered intervals; inverted bounds are reserved for future antimeridian "
+ "wraparound semantics.");
}
}

public static boolean intersects(Geometry leftGeometry, Geometry rightGeometry) {
return leftGeometry.intersects(rightGeometry);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import static org.apache.sedona.common.Functions.crossesDateLine;
import static org.junit.Assert.*;

import org.apache.sedona.common.geometryObjects.Box2D;
import org.junit.Test;
import org.locationtech.jts.geom.Coordinate;
import org.locationtech.jts.geom.Geometry;
Expand All @@ -32,6 +33,55 @@ public class PredicatesTest extends TestBase {

private static final GeometryFactory GEOMETRY_FACTORY = new GeometryFactory();

@Test
public void testBoxIntersects() {
Box2D a = new Box2D(0.0, 0.0, 5.0, 5.0);

// Full overlap
assertTrue(Predicates.boxIntersects(a, new Box2D(1.0, 1.0, 2.0, 2.0)));
// Partial overlap
assertTrue(Predicates.boxIntersects(a, new Box2D(3.0, 3.0, 7.0, 7.0)));
// Edge-touching (closed intervals)
assertTrue(Predicates.boxIntersects(a, new Box2D(5.0, 0.0, 10.0, 5.0)));
// Corner-touching (closed intervals)
assertTrue(Predicates.boxIntersects(a, new Box2D(5.0, 5.0, 10.0, 10.0)));
// Disjoint on X
assertFalse(Predicates.boxIntersects(a, new Box2D(6.0, 0.0, 10.0, 5.0)));
// Disjoint on Y
assertFalse(Predicates.boxIntersects(a, new Box2D(0.0, 6.0, 5.0, 10.0)));
}
Comment on lines +36 to +52
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added testBoxPredicatesRejectInvertedBounds in 6726b1b covering both inverted-X (the antimeridian-style case) and inverted-Y, asserting the documented IllegalArgumentException is thrown.


@Test
public void testBoxContains() {
Box2D outer = new Box2D(0.0, 0.0, 10.0, 10.0);

assertTrue(Predicates.boxContains(outer, new Box2D(2.0, 2.0, 5.0, 5.0)));
// Boundaries are inclusive
assertTrue(Predicates.boxContains(outer, new Box2D(0.0, 0.0, 10.0, 10.0)));
assertTrue(Predicates.boxContains(outer, new Box2D(0.0, 0.0, 1.0, 1.0)));
// Outside on X
assertFalse(Predicates.boxContains(outer, new Box2D(-1.0, 0.0, 5.0, 5.0)));
// Crosses boundary on X
assertFalse(Predicates.boxContains(outer, new Box2D(5.0, 0.0, 11.0, 5.0)));
}

@Test
public void testBoxPredicatesRejectInvertedBounds() {
// Box2D allows xmin > xmax (reserved for future antimeridian wraparound); planar predicates
// refuse to evaluate them rather than silently returning misleading results.
Box2D normal = new Box2D(0.0, 0.0, 5.0, 5.0);
Box2D wrapX = new Box2D(170.0, 10.0, -170.0, 20.0); // longitude crosses antimeridian
Box2D wrapY = new Box2D(0.0, 5.0, 5.0, 0.0); // ymin > ymax

IllegalArgumentException ex1 =
assertThrows(IllegalArgumentException.class, () -> Predicates.boxIntersects(wrapX, normal));
assertTrue(ex1.getMessage().contains("inverted bounds"));

IllegalArgumentException ex2 =
assertThrows(IllegalArgumentException.class, () -> Predicates.boxContains(normal, wrapY));
assertTrue(ex2.getMessage().contains("inverted bounds"));
}

@Test
public void testDWithinSuccess() {
Geometry point1 = GEOMETRY_FACTORY.createPoint(new Coordinate(1, 1));
Expand Down
2 changes: 2 additions & 0 deletions flink/src/main/java/org/apache/sedona/flink/Catalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,8 @@ public static UserDefinedFunction[] getFuncs() {

public static UserDefinedFunction[] getPredicates() {
return new UserDefinedFunction[] {
new Predicates.ST_BoxContains(),
new Predicates.ST_BoxIntersects(),
new Predicates.ST_Intersects(),
new Predicates.ST_Contains(),
new Predicates.ST_Crosses(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,49 @@

import org.apache.flink.table.annotation.DataTypeHint;
import org.apache.flink.table.functions.ScalarFunction;
import org.apache.sedona.common.geometryObjects.Box2D;
import org.apache.sedona.flink.Box2DTypeSerializer;
import org.apache.sedona.flink.GeometryTypeSerializer;
import org.locationtech.jts.geom.Geometry;

public class Predicates {

public static class ST_BoxIntersects extends ScalarFunction {
@DataTypeHint("Boolean")
public Boolean eval(
@DataTypeHint(
value = "RAW",
rawSerializer = Box2DTypeSerializer.class,
bridgedTo = Box2D.class)
Box2D a,
@DataTypeHint(
value = "RAW",
rawSerializer = Box2DTypeSerializer.class,
bridgedTo = Box2D.class)
Box2D b) {
if (a == null || b == null) return null;
return org.apache.sedona.common.Predicates.boxIntersects(a, b);
}
}

public static class ST_BoxContains extends ScalarFunction {
@DataTypeHint("Boolean")
public Boolean eval(
@DataTypeHint(
value = "RAW",
rawSerializer = Box2DTypeSerializer.class,
bridgedTo = Box2D.class)
Box2D a,
@DataTypeHint(
value = "RAW",
rawSerializer = Box2DTypeSerializer.class,
bridgedTo = Box2D.class)
Box2D b) {
if (a == null || b == null) return null;
return org.apache.sedona.common.Predicates.boxContains(a, b);
}
}

public static class ST_Intersects extends ScalarFunction {
/** Constructor for relation checking without duplicate removal */
public ST_Intersects() {}
Expand Down
28 changes: 28 additions & 0 deletions flink/src/test/java/org/apache/sedona/flink/PredicateTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,34 @@ public static void onceExecutedBeforeAll() {
initialize();
}

@Test
public void testBoxIntersects() {
Table t =
tableEnv.sqlQuery(
"WITH boxes AS ("
+ " SELECT ST_Box2D(ST_GeomFromWKT('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))')) AS a,"
+ " ST_Box2D(ST_GeomFromWKT('POLYGON((3 3, 3 7, 7 7, 7 3, 3 3))')) AS overlap,"
+ " ST_Box2D(ST_GeomFromWKT('POLYGON((6 6, 6 7, 7 7, 7 6, 6 6))')) AS disjoint)"
+ " SELECT ST_BoxIntersects(a, overlap), ST_BoxIntersects(a, disjoint) FROM boxes");
org.apache.flink.types.Row row = first(t);
assertEquals(true, row.getField(0));
assertEquals(false, row.getField(1));
}

@Test
public void testBoxContains() {
Table t =
tableEnv.sqlQuery(
"WITH boxes AS ("
+ " SELECT ST_Box2D(ST_GeomFromWKT('POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))')) AS outer_box,"
+ " ST_Box2D(ST_GeomFromWKT('POLYGON((2 2, 2 5, 5 5, 5 2, 2 2))')) AS inner_box,"
+ " ST_Box2D(ST_GeomFromWKT('POLYGON((5 5, 5 11, 11 11, 11 5, 5 5))')) AS overlap)"
+ " SELECT ST_BoxContains(outer_box, inner_box), ST_BoxContains(outer_box, overlap) FROM boxes");
org.apache.flink.types.Row row = first(t);
assertEquals(true, row.getField(0));
assertEquals(false, row.getField(1));
}

@Test
public void testIntersects() {
Table pointTable = createPointTable(testDataSize);
Expand Down
32 changes: 32 additions & 0 deletions python/sedona/spark/sql/st_predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,38 @@
_call_predicate_function = partial(call_sedona_function, "st_predicates")


@validate_argument_types
def ST_BoxContains(a: ColumnOrName, b: ColumnOrName) -> Column:
"""Check whether Box2D a fully contains Box2D b (closed intervals).

Mirrors PostGIS ``~`` on box2d. NULL on null input.

:param a: Outer Box2D column.
:type a: ColumnOrName
:param b: Inner Box2D column.
:type b: ColumnOrName
:return: True if a contains b, false otherwise.
:rtype: Column
"""
return _call_predicate_function("ST_BoxContains", (a, b))


@validate_argument_types
def ST_BoxIntersects(a: ColumnOrName, b: ColumnOrName) -> Column:
"""Check whether Box2D a and Box2D b share any point (closed intervals).

Mirrors PostGIS ``&&`` on box2d. NULL on null input.

:param a: First Box2D column.
:type a: ColumnOrName
:param b: Second Box2D column.
:type b: ColumnOrName
:return: True if a and b overlap, false otherwise.
:rtype: Column
"""
return _call_predicate_function("ST_BoxIntersects", (a, b))


@validate_argument_types
def ST_Contains(a: ColumnOrName, b: ColumnOrName) -> Column:
"""Check whether geometry a contains geometry b.
Expand Down
24 changes: 24 additions & 0 deletions python/tests/sql/test_predicate.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,30 @@

class TestPredicate(TestBase):

def test_st_box_intersects_and_contains(self):
df = self.spark.sql("""
WITH t AS (
SELECT
ST_Box2D(ST_GeomFromText('POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))')) AS a,
ST_Box2D(ST_GeomFromText('POLYGON((2 2, 2 5, 5 5, 5 2, 2 2))')) AS inside,
ST_Box2D(ST_GeomFromText('POLYGON((5 5, 5 11, 11 11, 11 5, 5 5))')) AS overlap,
ST_Box2D(ST_GeomFromText('POLYGON((11 11, 11 12, 12 12, 12 11, 11 11))')) AS disjoint
)
SELECT
ST_BoxIntersects(a, inside) AS i_inside,
ST_BoxIntersects(a, overlap) AS i_overlap,
ST_BoxIntersects(a, disjoint) AS i_disjoint,
ST_BoxContains(a, inside) AS c_inside,
ST_BoxContains(a, overlap) AS c_overlap
FROM t
""")
row = df.first()
assert row[0] is True
assert row[1] is True
assert row[2] is False
assert row[3] is True
assert row[4] is False

def test_st_contains(self):
point_csv_df = (
self.spark.read.format("csv")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ object Catalog extends AbstractCatalog with Logging {

// Predicates
val predicateExprs: Seq[FunctionDescription] = Seq(
function[ST_BoxContains](),
function[ST_BoxIntersects](),
function[ST_Contains](),
function[ST_CoveredBy](),
function[ST_Covers](),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,38 @@ private[apache] case class ST_Contains(inputExpressions: Seq[Expression])
}
}

/**
* Closed-interval bbox intersection over two Box2D arguments. Returns true if the boxes overlap
* on both the X and Y axes (matches PostGIS `&&` on box2d). Edge- and corner-touching boxes count
* as intersecting. Throws on inverted bounds (xmin>xmax / ymin>ymax) since planar predicates have
* no defined meaning for inverted intervals; that ordering is reserved for future
* antimeridian-wraparound semantics.
*
* @param inputExpressions
*/
private[apache] case class ST_BoxIntersects(inputExpressions: Seq[Expression])
extends InferredExpression(Predicates.boxIntersects _) {

protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
copy(inputExpressions = newChildren)
}
}

/**
* Closed-interval bbox containment over two Box2D arguments. Returns true if argument `a` fully
* contains argument `b` on both axes (matches PostGIS `~` on box2d). Equal boxes contain each
* other. Throws on inverted bounds for the same reason as ST_BoxIntersects.
*
* @param inputExpressions
*/
private[apache] case class ST_BoxContains(inputExpressions: Seq[Expression])
extends InferredExpression(Predicates.boxContains _) {

protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
copy(inputExpressions = newChildren)
}
}

/**
* Test if leftGeometry full intersects rightGeometry. Supports both Geometry (JTS) and Geography
* (S2) inputs via InferredExpression dual dispatch.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ import org.apache.spark.sql.Column
import org.apache.spark.sql.sedona_sql.DataFrameShims._

object st_predicates {
def ST_BoxContains(a: Column, b: Column): Column = wrapExpression[ST_BoxContains](a, b)
def ST_BoxContains(a: String, b: String): Column = wrapExpression[ST_BoxContains](a, b)

def ST_BoxIntersects(a: Column, b: Column): Column = wrapExpression[ST_BoxIntersects](a, b)
def ST_BoxIntersects(a: String, b: String): Column = wrapExpression[ST_BoxIntersects](a, b)

def ST_Contains(a: Column, b: Column): Column = wrapExpression[ST_Contains](a, b)
def ST_Contains(a: String, b: String): Column = wrapExpression[ST_Contains](a, b)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,41 @@ class predicateTestScala extends TestBaseScala {

describe("Sedona-SQL Predicate Test") {

it("Passed ST_BoxIntersects and ST_BoxContains") {
val df = sparkSession.sql("""
WITH t AS (
SELECT
ST_Box2D(ST_GeomFromText('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))')) AS a,
ST_Box2D(ST_GeomFromText('POLYGON((1 1, 1 2, 2 2, 2 1, 1 1))')) AS inside,
ST_Box2D(ST_GeomFromText('POLYGON((3 3, 3 7, 7 7, 7 3, 3 3))')) AS overlap,
ST_Box2D(ST_GeomFromText('POLYGON((5 0, 5 5, 10 5, 10 0, 5 0))')) AS edge,
ST_Box2D(ST_GeomFromText('POLYGON((6 6, 6 7, 7 7, 7 6, 6 6))')) AS disjoint,
ST_Box2D(ST_GeomFromText(NULL)) AS box_null
)
SELECT
ST_BoxIntersects(a, inside),
ST_BoxIntersects(a, overlap),
ST_BoxIntersects(a, edge),
ST_BoxIntersects(a, disjoint),
ST_BoxIntersects(a, box_null),
ST_BoxContains(a, inside),
ST_BoxContains(a, overlap),
ST_BoxContains(a, a),
ST_BoxContains(a, box_null)
FROM t
""")
val row = df.collect()(0)
assert(row.getBoolean(0)) // intersects: inside
assert(row.getBoolean(1)) // intersects: overlap
assert(row.getBoolean(2)) // intersects: edge-touch
assert(!row.getBoolean(3)) // intersects: disjoint
assert(row.isNullAt(4)) // intersects: NULL propagates
assert(row.getBoolean(5)) // contains: inside
assert(!row.getBoolean(6)) // contains: overlap (extends past)
assert(row.getBoolean(7)) // contains: equal
assert(row.isNullAt(8)) // contains: NULL propagates
}

it("Passed ST_Contains") {
var pointCsvDF = sparkSession.read
.format("csv")
Expand Down
Loading