Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions common/src/main/java/org/apache/sedona/common/Predicates.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.sedona.common;

import org.apache.sedona.common.geometryObjects.Box2D;
import org.apache.sedona.common.sphere.Spheroid;
import org.locationtech.jts.geom.*;
import org.locationtech.jts.operation.relate.RelateOp;
Expand All @@ -27,6 +28,30 @@ public static boolean contains(Geometry leftGeometry, Geometry rightGeometry) {
return leftGeometry.contains(rightGeometry);
}

/**
* Closed-interval bbox intersection: true if {@code a} and {@code b} share any point on either
* axis (matches PostGIS {@code &&} on box2d). Either argument being null returns null at the SQL
* layer; this Java entry point throws {@link NullPointerException} on null input.
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reworded in 6726b1b: now says overlap on both the X and Y axes.

*/
public static boolean boxIntersects(Box2D a, Box2D b) {
return !(a.getXMax() < b.getXMin()
|| a.getXMin() > b.getXMax()
|| a.getYMax() < b.getYMin()
|| a.getYMin() > b.getYMax());
Comment on lines +43 to +49
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in 6726b1b — went with option (a). boxIntersects/boxContains now call requireOrderedPlanarBox and throw IllegalArgumentException with a clear message on inverted bounds. Defining wraparound semantics on a planar predicate without the geography work in #2929 would be premature; failing fast is the honest contract for now.

}

/**
* True if {@code a} fully contains {@code b} (closed intervals; matches PostGIS {@code ~} on
* box2d). Either argument being null returns null at the SQL layer; this Java entry point throws
* {@link NullPointerException} on null input.
*/
public static boolean boxContains(Box2D a, Box2D b) {
return a.getXMin() <= b.getXMin()
&& a.getYMin() <= b.getYMin()
&& a.getXMax() >= b.getXMax()
&& a.getYMax() >= b.getYMax();
Comment on lines +60 to +66
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same fix in 6726b1bboxContains now validates ordered bounds before evaluating containment.

}

public static boolean intersects(Geometry leftGeometry, Geometry rightGeometry) {
return leftGeometry.intersects(rightGeometry);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import static org.apache.sedona.common.Functions.crossesDateLine;
import static org.junit.Assert.*;

import org.apache.sedona.common.geometryObjects.Box2D;
import org.junit.Test;
import org.locationtech.jts.geom.Coordinate;
import org.locationtech.jts.geom.Geometry;
Expand All @@ -32,6 +33,38 @@ public class PredicatesTest extends TestBase {

private static final GeometryFactory GEOMETRY_FACTORY = new GeometryFactory();

@Test
public void testBoxIntersects() {
Box2D a = new Box2D(0.0, 0.0, 5.0, 5.0);

// Full overlap
assertTrue(Predicates.boxIntersects(a, new Box2D(1.0, 1.0, 2.0, 2.0)));
// Partial overlap
assertTrue(Predicates.boxIntersects(a, new Box2D(3.0, 3.0, 7.0, 7.0)));
// Edge-touching (closed intervals)
assertTrue(Predicates.boxIntersects(a, new Box2D(5.0, 0.0, 10.0, 5.0)));
// Corner-touching (closed intervals)
assertTrue(Predicates.boxIntersects(a, new Box2D(5.0, 5.0, 10.0, 10.0)));
// Disjoint on X
assertFalse(Predicates.boxIntersects(a, new Box2D(6.0, 0.0, 10.0, 5.0)));
// Disjoint on Y
assertFalse(Predicates.boxIntersects(a, new Box2D(0.0, 6.0, 5.0, 10.0)));
}
Comment on lines +36 to +52
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added testBoxPredicatesRejectInvertedBounds in 6726b1b covering both inverted-X (the antimeridian-style case) and inverted-Y, asserting the documented IllegalArgumentException is thrown.


@Test
public void testBoxContains() {
Box2D outer = new Box2D(0.0, 0.0, 10.0, 10.0);

assertTrue(Predicates.boxContains(outer, new Box2D(2.0, 2.0, 5.0, 5.0)));
// Boundaries are inclusive
assertTrue(Predicates.boxContains(outer, new Box2D(0.0, 0.0, 10.0, 10.0)));
assertTrue(Predicates.boxContains(outer, new Box2D(0.0, 0.0, 1.0, 1.0)));
// Outside on X
assertFalse(Predicates.boxContains(outer, new Box2D(-1.0, 0.0, 5.0, 5.0)));
// Crosses boundary on X
assertFalse(Predicates.boxContains(outer, new Box2D(5.0, 0.0, 11.0, 5.0)));
}

@Test
public void testDWithinSuccess() {
Geometry point1 = GEOMETRY_FACTORY.createPoint(new Coordinate(1, 1));
Expand Down
2 changes: 2 additions & 0 deletions flink/src/main/java/org/apache/sedona/flink/Catalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,8 @@ public static UserDefinedFunction[] getFuncs() {

public static UserDefinedFunction[] getPredicates() {
return new UserDefinedFunction[] {
new Predicates.ST_BoxContains(),
new Predicates.ST_BoxIntersects(),
new Predicates.ST_Intersects(),
new Predicates.ST_Contains(),
new Predicates.ST_Crosses(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,49 @@

import org.apache.flink.table.annotation.DataTypeHint;
import org.apache.flink.table.functions.ScalarFunction;
import org.apache.sedona.common.geometryObjects.Box2D;
import org.apache.sedona.flink.Box2DTypeSerializer;
import org.apache.sedona.flink.GeometryTypeSerializer;
import org.locationtech.jts.geom.Geometry;

public class Predicates {

public static class ST_BoxIntersects extends ScalarFunction {
@DataTypeHint("Boolean")
public Boolean eval(
@DataTypeHint(
value = "RAW",
rawSerializer = Box2DTypeSerializer.class,
bridgedTo = Box2D.class)
Box2D a,
@DataTypeHint(
value = "RAW",
rawSerializer = Box2DTypeSerializer.class,
bridgedTo = Box2D.class)
Box2D b) {
if (a == null || b == null) return null;
return org.apache.sedona.common.Predicates.boxIntersects(a, b);
}
}

public static class ST_BoxContains extends ScalarFunction {
@DataTypeHint("Boolean")
public Boolean eval(
@DataTypeHint(
value = "RAW",
rawSerializer = Box2DTypeSerializer.class,
bridgedTo = Box2D.class)
Box2D a,
@DataTypeHint(
value = "RAW",
rawSerializer = Box2DTypeSerializer.class,
bridgedTo = Box2D.class)
Box2D b) {
if (a == null || b == null) return null;
return org.apache.sedona.common.Predicates.boxContains(a, b);
}
}

public static class ST_Intersects extends ScalarFunction {
/** Constructor for relation checking without duplicate removal */
public ST_Intersects() {}
Expand Down
28 changes: 28 additions & 0 deletions flink/src/test/java/org/apache/sedona/flink/PredicateTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,34 @@ public static void onceExecutedBeforeAll() {
initialize();
}

@Test
public void testBoxIntersects() {
Table t =
tableEnv.sqlQuery(
"WITH boxes AS ("
+ " SELECT ST_Box2D(ST_GeomFromWKT('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))')) AS a,"
+ " ST_Box2D(ST_GeomFromWKT('POLYGON((3 3, 3 7, 7 7, 7 3, 3 3))')) AS overlap,"
+ " ST_Box2D(ST_GeomFromWKT('POLYGON((6 6, 6 7, 7 7, 7 6, 6 6))')) AS disjoint)"
+ " SELECT ST_BoxIntersects(a, overlap), ST_BoxIntersects(a, disjoint) FROM boxes");
org.apache.flink.types.Row row = first(t);
assertEquals(true, row.getField(0));
assertEquals(false, row.getField(1));
}

@Test
public void testBoxContains() {
Table t =
tableEnv.sqlQuery(
"WITH boxes AS ("
+ " SELECT ST_Box2D(ST_GeomFromWKT('POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))')) AS outer_box,"
+ " ST_Box2D(ST_GeomFromWKT('POLYGON((2 2, 2 5, 5 5, 5 2, 2 2))')) AS inner_box,"
+ " ST_Box2D(ST_GeomFromWKT('POLYGON((5 5, 5 11, 11 11, 11 5, 5 5))')) AS overlap)"
+ " SELECT ST_BoxContains(outer_box, inner_box), ST_BoxContains(outer_box, overlap) FROM boxes");
org.apache.flink.types.Row row = first(t);
assertEquals(true, row.getField(0));
assertEquals(false, row.getField(1));
}

@Test
public void testIntersects() {
Table pointTable = createPointTable(testDataSize);
Expand Down
32 changes: 32 additions & 0 deletions python/sedona/spark/sql/st_predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,38 @@
_call_predicate_function = partial(call_sedona_function, "st_predicates")


@validate_argument_types
def ST_BoxContains(a: ColumnOrName, b: ColumnOrName) -> Column:
"""Check whether Box2D a fully contains Box2D b (closed intervals).

Mirrors PostGIS ``~`` on box2d. NULL on null input.

:param a: Outer Box2D column.
:type a: ColumnOrName
:param b: Inner Box2D column.
:type b: ColumnOrName
:return: True if a contains b, false otherwise.
:rtype: Column
"""
return _call_predicate_function("ST_BoxContains", (a, b))


@validate_argument_types
def ST_BoxIntersects(a: ColumnOrName, b: ColumnOrName) -> Column:
"""Check whether Box2D a and Box2D b share any point (closed intervals).

Mirrors PostGIS ``&&`` on box2d. NULL on null input.

:param a: First Box2D column.
:type a: ColumnOrName
:param b: Second Box2D column.
:type b: ColumnOrName
:return: True if a and b overlap, false otherwise.
:rtype: Column
"""
return _call_predicate_function("ST_BoxIntersects", (a, b))


@validate_argument_types
def ST_Contains(a: ColumnOrName, b: ColumnOrName) -> Column:
"""Check whether geometry a contains geometry b.
Expand Down
24 changes: 24 additions & 0 deletions python/tests/sql/test_predicate.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,30 @@

class TestPredicate(TestBase):

def test_st_box_intersects_and_contains(self):
df = self.spark.sql("""
WITH t AS (
SELECT
ST_Box2D(ST_GeomFromText('POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))')) AS a,
ST_Box2D(ST_GeomFromText('POLYGON((2 2, 2 5, 5 5, 5 2, 2 2))')) AS inside,
ST_Box2D(ST_GeomFromText('POLYGON((5 5, 5 11, 11 11, 11 5, 5 5))')) AS overlap,
ST_Box2D(ST_GeomFromText('POLYGON((11 11, 11 12, 12 12, 12 11, 11 11))')) AS disjoint
)
SELECT
ST_BoxIntersects(a, inside) AS i_inside,
ST_BoxIntersects(a, overlap) AS i_overlap,
ST_BoxIntersects(a, disjoint) AS i_disjoint,
ST_BoxContains(a, inside) AS c_inside,
ST_BoxContains(a, overlap) AS c_overlap
FROM t
""")
row = df.first()
assert row[0] is True
assert row[1] is True
assert row[2] is False
assert row[3] is True
assert row[4] is False

def test_st_contains(self):
point_csv_df = (
self.spark.read.format("csv")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ object Catalog extends AbstractCatalog with Logging {

// Predicates
val predicateExprs: Seq[FunctionDescription] = Seq(
function[ST_BoxContains](),
function[ST_BoxIntersects](),
function[ST_Contains](),
function[ST_CoveredBy](),
function[ST_Covers](),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.spark.sql.sedona_sql.expressions

import org.apache.sedona.common.Predicates
import org.apache.sedona.common.geometryObjects.Box2D
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed in 6726b1bBox2D was used implicitly via Predicates.boxIntersects _ eta-expansion, no explicit reference needed in this file.

import org.apache.sedona.sql.utils.GeometrySerializer
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
Expand Down Expand Up @@ -95,6 +96,22 @@ private[apache] case class ST_Contains(inputExpressions: Seq[Expression])
*
* @param inputExpressions
*/
private[apache] case class ST_BoxIntersects(inputExpressions: Seq[Expression])
extends InferredExpression(Predicates.boxIntersects _) {

Comment on lines 92 to +103
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 6726b1b — replaced the bleed-through Scaladoc above ST_BoxIntersects with proper Box2D semantics, added a parallel doc on ST_BoxContains, and restored the original ST_Intersects Scaladoc below.

protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
copy(inputExpressions = newChildren)
}
}

private[apache] case class ST_BoxContains(inputExpressions: Seq[Expression])
extends InferredExpression(Predicates.boxContains _) {

protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
copy(inputExpressions = newChildren)
}
}

private[apache] case class ST_Intersects(inputExpressions: Seq[Expression])
extends InferredExpression(
inferrableFunction2(Predicates.intersects),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ import org.apache.spark.sql.Column
import org.apache.spark.sql.sedona_sql.DataFrameShims._

object st_predicates {
def ST_BoxContains(a: Column, b: Column): Column = wrapExpression[ST_BoxContains](a, b)
def ST_BoxContains(a: String, b: String): Column = wrapExpression[ST_BoxContains](a, b)

def ST_BoxIntersects(a: Column, b: Column): Column = wrapExpression[ST_BoxIntersects](a, b)
def ST_BoxIntersects(a: String, b: String): Column = wrapExpression[ST_BoxIntersects](a, b)

def ST_Contains(a: Column, b: Column): Column = wrapExpression[ST_Contains](a, b)
def ST_Contains(a: String, b: String): Column = wrapExpression[ST_Contains](a, b)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,41 @@ class predicateTestScala extends TestBaseScala {

describe("Sedona-SQL Predicate Test") {

it("Passed ST_BoxIntersects and ST_BoxContains") {
val df = sparkSession.sql("""
WITH t AS (
SELECT
ST_Box2D(ST_GeomFromText('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))')) AS a,
ST_Box2D(ST_GeomFromText('POLYGON((1 1, 1 2, 2 2, 2 1, 1 1))')) AS inside,
ST_Box2D(ST_GeomFromText('POLYGON((3 3, 3 7, 7 7, 7 3, 3 3))')) AS overlap,
ST_Box2D(ST_GeomFromText('POLYGON((5 0, 5 5, 10 5, 10 0, 5 0))')) AS edge,
ST_Box2D(ST_GeomFromText('POLYGON((6 6, 6 7, 7 7, 7 6, 6 6))')) AS disjoint,
ST_Box2D(ST_GeomFromText(NULL)) AS box_null
)
SELECT
ST_BoxIntersects(a, inside),
ST_BoxIntersects(a, overlap),
ST_BoxIntersects(a, edge),
ST_BoxIntersects(a, disjoint),
ST_BoxIntersects(a, box_null),
ST_BoxContains(a, inside),
ST_BoxContains(a, overlap),
ST_BoxContains(a, a),
ST_BoxContains(a, box_null)
FROM t
""")
val row = df.collect()(0)
assert(row.getBoolean(0)) // intersects: inside
assert(row.getBoolean(1)) // intersects: overlap
assert(row.getBoolean(2)) // intersects: edge-touch
assert(!row.getBoolean(3)) // intersects: disjoint
assert(row.isNullAt(4)) // intersects: NULL propagates
assert(row.getBoolean(5)) // contains: inside
assert(!row.getBoolean(6)) // contains: overlap (extends past)
assert(row.getBoolean(7)) // contains: equal
assert(row.isNullAt(8)) // contains: NULL propagates
}

it("Passed ST_Contains") {
var pointCsvDF = sparkSession.read
.format("csv")
Expand Down
Loading