Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 29 additions & 4 deletions src/backend/gporca/libgpopt/src/base/CUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -922,10 +922,30 @@ struct SCTEInfo

typedef CDynamicPtrArray<SCTEInfo, CleanupDelete<SCTEInfo> > CTEInfoArray;

// True if the operator is a correlated NL join. Its inner side becomes
// an executor SubPlan that runs in its own slice, so a CTE Consumer
// there is cross-slice w.r.t. a Producer outside -- which can deadlock
// the ShareInputScan writer. We treat the inner side as a slice
// boundary so the check below catches it.
//
// These are all of ORCA's SubPlan-producing operators. Add new ones here.
static BOOL
FCorrelatedNLJoin(COperator *pop)
{
COperator::EOperatorId eopid = pop->Eopid();
return (COperator::EopPhysicalCorrelatedInnerNLJoin == eopid ||
COperator::EopPhysicalCorrelatedLeftOuterNLJoin == eopid ||
COperator::EopPhysicalCorrelatedLeftSemiNLJoin == eopid ||
COperator::EopPhysicalCorrelatedInLeftSemiNLJoin == eopid ||
COperator::EopPhysicalCorrelatedLeftAntiSemiNLJoin == eopid ||
COperator::EopPhysicalCorrelatedNotInLeftAntiSemiNLJoin == eopid);
}
Comment on lines +925 to +942

// Walk the physical tree, recording the slice id of every replicated
// CTE Producer and every CTE Consumer. Slices are delimited by Motion
// nodes: each non-scalar child of a Motion lives in a fresh slice --
// same motId-stack idea as in apply_shareinput_xslice.
// nodes and by the SubPlan (inner) side of correlated NL joins: each
// such non-scalar child lives in a fresh slice -- same motId-stack idea
// as in apply_shareinput_xslice.
static void
CollectCTESlices(CMemoryPool *mp, CExpression *pexpr, ULONG curSlice,
ULONG *pNextSlice, CTEInfoArray *prodInfos,
Expand Down Expand Up @@ -958,6 +978,7 @@ CollectCTESlices(CMemoryPool *mp, CExpression *pexpr, ULONG curSlice,
}

BOOL isMotion = CUtils::FPhysicalMotion(pop);
BOOL isCorrelatedNLJoin = FCorrelatedNLJoin(pop);

for (ULONG ul = 0; ul < pexpr->Arity(); ul++)
{
Expand All @@ -971,9 +992,13 @@ CollectCTESlices(CMemoryPool *mp, CExpression *pexpr, ULONG curSlice,
}

// Allocate a fresh slice id for each non-scalar child of a
// Motion; otherwise the child stays in the parent's slice.
// Motion, and for the inner (subquery) side of a correlated NL
// join -- which the executor materializes as a SubPlan running
// in its own slice. Otherwise the child stays in the parent's
// slice. (For a NL join child 0 is the outer relation and child
// 1 is the inner/subquery relation.)
ULONG childSlice = curSlice;
if (isMotion)
if (isMotion || (isCorrelatedNLJoin && 1 == ul))
{
(*pNextSlice)++;
childSlice = *pNextSlice;
Expand Down
32 changes: 32 additions & 0 deletions src/test/regress/expected/shared_scan.out
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,38 @@ WITH

RESET statement_timeout;
DROP TABLE ss_t1, ss_t2;
-- ORCA should also fall back when the replicated CTE is referenced from
-- *correlated* scalar subqueries. These become correlated NL joins whose
-- inner side runs as a SubPlan in its own slice. Counting only Motion
-- nodes misses this, so the ShareInputScan writer hangs waiting for a
-- DONE ack from the reader slice. The walk must treat the correlated-join
-- inner side as a separate slice.
CREATE TABLE ss_c2 (id numeric, refrcode varchar(255), referenceid numeric)
DISTRIBUTED REPLICATED;
CREATE TABLE ss_c1 (id bigint, iscalctrg varchar(15) NOT NULL,
iscalcdetail varchar(15))
DISTRIBUTED REPLICATED;
INSERT INTO ss_c2 SELECT i, 'A'||(i%5), 101991 FROM generate_series(1, 50000) i;
INSERT INTO ss_c1
SELECT i, 'A'||(i%5), 'A'||(i%7) FROM generate_series(1, 50000) i;
ANALYZE ss_c1;
ANALYZE ss_c2;
SET statement_timeout = '15s';
WITH cte AS (
SELECT id, refrcode FROM ss_c2 WHERE referenceid = 101991 AND id < 25000
UNION ALL
SELECT id, refrcode FROM ss_c2 WHERE referenceid = 101991 AND id >= 25000
)
SELECT (SELECT refrcode FROM cte WHERE refrcode = p.iscalctrg LIMIT 1) = 'A1'
AND (SELECT refrcode FROM cte WHERE refrcode = p.iscalcdetail LIMIT 1) = 'A1' AS ok
FROM ss_c1 p WHERE p.id = 1;
ok
----
t
(1 row)

RESET statement_timeout;
DROP TABLE ss_c1, ss_c2;
-- Test the scenario which already opened many fds
-- start_ignore
RESET search_path;
Expand Down
32 changes: 32 additions & 0 deletions src/test/regress/expected/shared_scan_optimizer.out
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,38 @@ WITH

RESET statement_timeout;
DROP TABLE ss_t1, ss_t2;
-- ORCA should also fall back when the replicated CTE is referenced from
-- *correlated* scalar subqueries. These become correlated NL joins whose
-- inner side runs as a SubPlan in its own slice. Counting only Motion
-- nodes misses this, so the ShareInputScan writer hangs waiting for a
-- DONE ack from the reader slice. The walk must treat the correlated-join
-- inner side as a separate slice.
CREATE TABLE ss_c2 (id numeric, refrcode varchar(255), referenceid numeric)
DISTRIBUTED REPLICATED;
CREATE TABLE ss_c1 (id bigint, iscalctrg varchar(15) NOT NULL,
iscalcdetail varchar(15))
DISTRIBUTED REPLICATED;
INSERT INTO ss_c2 SELECT i, 'A'||(i%5), 101991 FROM generate_series(1, 50000) i;
INSERT INTO ss_c1
SELECT i, 'A'||(i%5), 'A'||(i%7) FROM generate_series(1, 50000) i;
ANALYZE ss_c1;
ANALYZE ss_c2;
SET statement_timeout = '15s';
WITH cte AS (
SELECT id, refrcode FROM ss_c2 WHERE referenceid = 101991 AND id < 25000
UNION ALL
SELECT id, refrcode FROM ss_c2 WHERE referenceid = 101991 AND id >= 25000
)
SELECT (SELECT refrcode FROM cte WHERE refrcode = p.iscalctrg LIMIT 1) = 'A1'
AND (SELECT refrcode FROM cte WHERE refrcode = p.iscalcdetail LIMIT 1) = 'A1' AS ok
FROM ss_c1 p WHERE p.id = 1;
ok
----
t
(1 row)

RESET statement_timeout;
DROP TABLE ss_c1, ss_c2;
-- Test the scenario which already opened many fds
-- start_ignore
RESET search_path;
Expand Down
29 changes: 29 additions & 0 deletions src/test/regress/sql/shared_scan.sql
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,35 @@ WITH
RESET statement_timeout;
DROP TABLE ss_t1, ss_t2;

-- ORCA should also fall back when the replicated CTE is referenced from
-- *correlated* scalar subqueries. These become correlated NL joins whose
-- inner side runs as a SubPlan in its own slice. Counting only Motion
-- nodes misses this, so the ShareInputScan writer hangs waiting for a
-- DONE ack from the reader slice. The walk must treat the correlated-join
-- inner side as a separate slice.
CREATE TABLE ss_c2 (id numeric, refrcode varchar(255), referenceid numeric)
DISTRIBUTED REPLICATED;
CREATE TABLE ss_c1 (id bigint, iscalctrg varchar(15) NOT NULL,
iscalcdetail varchar(15))
DISTRIBUTED REPLICATED;
INSERT INTO ss_c2 SELECT i, 'A'||(i%5), 101991 FROM generate_series(1, 50000) i;
INSERT INTO ss_c1
SELECT i, 'A'||(i%5), 'A'||(i%7) FROM generate_series(1, 50000) i;
ANALYZE ss_c1;
ANALYZE ss_c2;

SET statement_timeout = '15s';
WITH cte AS (
SELECT id, refrcode FROM ss_c2 WHERE referenceid = 101991 AND id < 25000
UNION ALL
SELECT id, refrcode FROM ss_c2 WHERE referenceid = 101991 AND id >= 25000
)
SELECT (SELECT refrcode FROM cte WHERE refrcode = p.iscalctrg LIMIT 1) = 'A1'
AND (SELECT refrcode FROM cte WHERE refrcode = p.iscalcdetail LIMIT 1) = 'A1' AS ok
FROM ss_c1 p WHERE p.id = 1;
RESET statement_timeout;
DROP TABLE ss_c1, ss_c2;

-- Test the scenario which already opened many fds
-- start_ignore
RESET search_path;
Expand Down
Loading