From 50148fe146b7befbccf58aca315da30d111c5d19 Mon Sep 17 00:00:00 2001 From: nolouch Date: Tue, 19 May 2026 17:34:04 -0700 Subject: [PATCH 1/6] util/stmtsummary: add tidb_stmt_summary_group_by_user Introduce a new global system variable tidb_stmt_summary_group_by_user: when ON, the executing user is appended to the statement summary grouping key so the same digest run by different users produces separate rows; flipping the flag clears existing data. The user is exposed as the USER column in INFORMATION_SCHEMA.STATEMENTS_SUMMARY. Off by default to preserve existing cardinality. Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/infoschema/tables.go | 2 + pkg/sessionctx/vardef/tidb_vars.go | 6 ++ pkg/sessionctx/variable/sysvar.go | 4 ++ pkg/util/stmtsummary/BUILD.bazel | 2 +- pkg/util/stmtsummary/evicted_test.go | 4 +- pkg/util/stmtsummary/reader.go | 4 ++ pkg/util/stmtsummary/statement_summary.go | 37 +++++++++- .../stmtsummary/statement_summary_test.go | 69 +++++++++++++++---- pkg/util/stmtsummary/v2/BUILD.bazel | 2 +- pkg/util/stmtsummary/v2/column.go | 4 ++ pkg/util/stmtsummary/v2/record.go | 4 ++ pkg/util/stmtsummary/v2/stmtsummary.go | 43 +++++++++++- pkg/util/stmtsummary/v2/stmtsummary_test.go | 48 +++++++++++++ 13 files changed, 208 insertions(+), 21 deletions(-) diff --git a/pkg/infoschema/tables.go b/pkg/infoschema/tables.go index d610f0ad9e54b..0a305dcbe542d 100644 --- a/pkg/infoschema/tables.go +++ b/pkg/infoschema/tables.go @@ -1326,6 +1326,7 @@ var tableStatementsSummaryCols = []columnInfo{ {name: stmtsummary.TableNamesStr, tp: mysql.TypeBlob, size: types.UnspecifiedLength, comment: "Involved tables"}, {name: stmtsummary.IndexNamesStr, tp: mysql.TypeBlob, size: types.UnspecifiedLength, comment: "Used indices"}, {name: stmtsummary.SampleUserStr, tp: mysql.TypeVarchar, size: 64, comment: "Sampled user who executed these statements"}, + {name: stmtsummary.UserStr, tp: mysql.TypeVarchar, size: 64, comment: "User that groups this summary when tidb_stmt_summary_group_by_user is ON"}, {name: stmtsummary.ExecCountStr, tp: mysql.TypeLonglong, size: 20, flag: mysql.NotNullFlag | mysql.UnsignedFlag, comment: "Count of executions"}, {name: stmtsummary.SumErrorsStr, tp: mysql.TypeLong, size: 11, flag: mysql.NotNullFlag | mysql.UnsignedFlag, comment: "Sum of errors"}, {name: stmtsummary.SumWarningsStr, tp: mysql.TypeLong, size: 11, flag: mysql.NotNullFlag | mysql.UnsignedFlag, comment: "Sum of warnings"}, @@ -1447,6 +1448,7 @@ var tableTiDBStatementsStatsCols = []columnInfo{ {name: stmtsummary.TableNamesStr, tp: mysql.TypeBlob, size: types.UnspecifiedLength, comment: "Involved tables"}, {name: stmtsummary.IndexNamesStr, tp: mysql.TypeBlob, size: types.UnspecifiedLength, comment: "Used indices"}, {name: stmtsummary.SampleUserStr, tp: mysql.TypeVarchar, size: 64, comment: "Sampled user who executed these statements"}, + {name: stmtsummary.UserStr, tp: mysql.TypeVarchar, size: 64, comment: "User that groups this summary when tidb_stmt_summary_group_by_user is ON"}, {name: stmtsummary.ExecCountStr, tp: mysql.TypeLonglong, size: 20, flag: mysql.NotNullFlag | mysql.UnsignedFlag, comment: "Count of executions"}, {name: stmtsummary.ErrorsStr, tp: mysql.TypeLong, size: 11, flag: mysql.NotNullFlag | mysql.UnsignedFlag, comment: "Sum of errors"}, {name: stmtsummary.WarningsStr, tp: mysql.TypeLong, size: 11, flag: mysql.NotNullFlag | mysql.UnsignedFlag, comment: "Sum of warnings"}, diff --git a/pkg/sessionctx/vardef/tidb_vars.go b/pkg/sessionctx/vardef/tidb_vars.go index 478b27f0d6e99..55b6d004a3c63 100644 --- a/pkg/sessionctx/vardef/tidb_vars.go +++ b/pkg/sessionctx/vardef/tidb_vars.go @@ -704,6 +704,11 @@ const ( // TiDBStmtSummaryMaxSQLLength indicates the max length of displayed normalized sql and sample sql. TiDBStmtSummaryMaxSQLLength = "tidb_stmt_summary_max_sql_length" + // TiDBStmtSummaryGroupByUser, when enabled, adds the executing user to the + // statement summary grouping key so the same digest run by different users + // produces separate rows. Off by default to avoid cardinality growth. + TiDBStmtSummaryGroupByUser = "tidb_stmt_summary_group_by_user" + // TiDBIgnoreInlistPlanDigest enables TiDB to generate the same plan digest with SQL using different in-list arguments. TiDBIgnoreInlistPlanDigest = "tidb_ignore_inlist_plan_digest" @@ -1588,6 +1593,7 @@ const ( DefTiDBStmtSummaryHistorySize = 24 DefTiDBStmtSummaryMaxStmtCount = 3000 DefTiDBStmtSummaryMaxSQLLength = 32768 + DefTiDBStmtSummaryGroupByUser = false DefTiDBCapturePlanBaseline = Off DefTiDBIgnoreInlistPlanDigest = true DefTiDBEnableIndexMerge = true diff --git a/pkg/sessionctx/variable/sysvar.go b/pkg/sessionctx/variable/sysvar.go index aff7f44916781..3735201d46e53 100644 --- a/pkg/sessionctx/variable/sysvar.go +++ b/pkg/sessionctx/variable/sysvar.go @@ -942,6 +942,10 @@ var defaultSysVars = []*SysVar{ SetGlobal: func(_ context.Context, s *SessionVars, val string) error { return stmtsummaryv2.SetMaxSQLLength(TidbOptInt(val, vardef.DefTiDBStmtSummaryMaxSQLLength)) }}, + {Scope: vardef.ScopeGlobal, Name: vardef.TiDBStmtSummaryGroupByUser, Value: BoolToOnOff(vardef.DefTiDBStmtSummaryGroupByUser), Type: vardef.TypeBool, AllowEmpty: true, + SetGlobal: func(_ context.Context, s *SessionVars, val string) error { + return stmtsummaryv2.SetGroupByUser(TiDBOptOn(val)) + }}, {Scope: vardef.ScopeGlobal, Name: vardef.TiDBCapturePlanBaseline, Value: vardef.DefTiDBCapturePlanBaseline, Type: vardef.TypeBool, AllowEmptyAll: true}, {Scope: vardef.ScopeGlobal, Name: vardef.TiDBEvolvePlanTaskMaxTime, Value: strconv.Itoa(vardef.DefTiDBEvolvePlanTaskMaxTime), Type: vardef.TypeInt, MinValue: -1, MaxValue: math.MaxInt64}, {Scope: vardef.ScopeGlobal, Name: vardef.TiDBEvolvePlanTaskStartTime, Value: vardef.DefTiDBEvolvePlanTaskStartTime, Type: vardef.TypeTime}, diff --git a/pkg/util/stmtsummary/BUILD.bazel b/pkg/util/stmtsummary/BUILD.bazel index aa3d6be872b5a..3d7a6fffe62ed 100644 --- a/pkg/util/stmtsummary/BUILD.bazel +++ b/pkg/util/stmtsummary/BUILD.bazel @@ -39,7 +39,7 @@ go_test( ], embed = [":stmtsummary"], flaky = True, - shard_count = 24, + shard_count = 25, deps = [ "//pkg/meta/model", "//pkg/parser/ast", diff --git a/pkg/util/stmtsummary/evicted_test.go b/pkg/util/stmtsummary/evicted_test.go index 54e572efb4911..73c183efcb7c8 100644 --- a/pkg/util/stmtsummary/evicted_test.go +++ b/pkg/util/stmtsummary/evicted_test.go @@ -52,7 +52,7 @@ func newInduceSsbde(beginTime int64, endTime int64) *stmtSummaryByDigestElement // generate new StmtDigestKey and stmtSummaryByDigest func generateStmtSummaryByDigestKeyValue(schema string, beginTime int64, endTime int64) (*StmtDigestKey, *stmtSummaryByDigest) { key := &StmtDigestKey{} - key.Init(schema, "", "", "", "") + key.Init(schema, "", "", "", "", "") value := newInduceSsbd(beginTime, endTime) return key, value } @@ -191,7 +191,7 @@ func TestSimpleStmtSummaryByDigestEvicted(t *testing.T) { require.Equal(t, "{begin: 8, end: 9, count: 1}, {begin: 5, end: 6, count: 1}, {begin: 2, end: 3, count: 1}", getAllEvicted(ssbde)) evictedKey = &StmtDigestKey{} - evictedKey.Init("b", "", "", "", "") + evictedKey.Init("b", "", "", "", "", "") ssbde.AddEvicted(evictedKey, evictedValue, 4) require.Equal(t, "{begin: 8, end: 9, count: 2}, {begin: 5, end: 6, count: 2}, {begin: 2, end: 3, count: 2}, {begin: 1, end: 2, count: 1}", getAllEvicted(ssbde)) diff --git a/pkg/util/stmtsummary/reader.go b/pkg/util/stmtsummary/reader.go index 00b13473f0f4e..57dfbfa9cc795 100644 --- a/pkg/util/stmtsummary/reader.go +++ b/pkg/util/stmtsummary/reader.go @@ -266,6 +266,7 @@ const ( TableNamesStr = "TABLE_NAMES" IndexNamesStr = "INDEX_NAMES" SampleUserStr = "SAMPLE_USER" + UserStr = "USER" ExecCountStr = "EXEC_COUNT" SumErrorsStr = "SUM_ERRORS" SumWarningsStr = "SUM_WARNINGS" @@ -487,6 +488,9 @@ var columnValueFactoryMap = map[string]columnValueFactory{ } return convertEmptyToNil(sampleUser) }, + UserStr: func(_ *stmtSummaryReader, _ *stmtSummaryByDigestElement, ssbd *stmtSummaryByDigest, _ *stmtSummaryStats) any { + return convertEmptyToNil(ssbd.user) + }, ExecCountStr: func(_ *stmtSummaryReader, _ *stmtSummaryByDigestElement, _ *stmtSummaryByDigest, ssStats *stmtSummaryStats) any { return ssStats.execCount }, diff --git a/pkg/util/stmtsummary/statement_summary.go b/pkg/util/stmtsummary/statement_summary.go index bccd1d5d8faed..4c6b414cde41c 100644 --- a/pkg/util/stmtsummary/statement_summary.go +++ b/pkg/util/stmtsummary/statement_summary.go @@ -52,8 +52,10 @@ type StmtDigestKey struct { } // Init initialize the hash key. -func (key *StmtDigestKey) Init(schemaName, digest, prevDigest, planDigest, resourceGroupName string) { - length := len(schemaName) + len(digest) + len(prevDigest) + len(planDigest) + len(resourceGroupName) +// When group_by_user is disabled, callers should pass an empty string for user, +// so the hash matches pre-user-dimension behavior. +func (key *StmtDigestKey) Init(schemaName, digest, prevDigest, planDigest, resourceGroupName, user string) { + length := len(schemaName) + len(digest) + len(prevDigest) + len(planDigest) + len(resourceGroupName) + len(user) if cap(key.hash) < length { key.hash = make([]byte, 0, length) } else { @@ -64,6 +66,7 @@ func (key *StmtDigestKey) Init(schemaName, digest, prevDigest, planDigest, resou key.hash = append(key.hash, hack.Slice(prevDigest)...) key.hash = append(key.hash, hack.Slice(planDigest)...) key.hash = append(key.hash, hack.Slice(resourceGroupName)...) + key.hash = append(key.hash, hack.Slice(user)...) } // Hash implements SimpleLRUCache.Key. @@ -89,6 +92,7 @@ type stmtSummaryByDigestMap struct { optRefreshInterval *atomic2.Int64 optHistorySize *atomic2.Int32 optMaxSQLLength *atomic2.Int32 + optGroupByUser *atomic2.Bool // other stores summary of evicted data. other *stmtSummaryByDigestEvicted @@ -117,6 +121,9 @@ type stmtSummaryByDigest struct { isInternal bool bindingSQL string bindingDigest string + // user is populated when group_by_user is enabled at creation time. + // When disabled, it is empty and not part of the grouping key. + user string } // stmtSummaryByDigestElement is the summary for each type of statements in current interval. @@ -318,6 +325,7 @@ func newStmtSummaryByDigestMap() *stmtSummaryByDigestMap { optRefreshInterval: atomic2.NewInt64(1800), optHistorySize: atomic2.NewInt32(24), optMaxSQLLength: atomic2.NewInt32(32768), + optGroupByUser: atomic2.NewBool(false), other: ssbde, } newSsMap.summaryMap.SetOnEvict(func(k kvcache.Key, v kvcache.Value) { @@ -349,9 +357,14 @@ func (ssMap *stmtSummaryByDigestMap) AddStatement(sei *StmtExecInfo) { historySize = ssMap.historySize() } + userForKey := "" + if ssMap.optGroupByUser.Load() { + userForKey = sei.User + } + key := StmtDigestKeyPool.Get().(*StmtDigestKey) // Init hash value in advance, to reduce the time holding the lock. - key.Init(sei.SchemaName, sei.Digest, sei.PrevSQLDigest, sei.PlanDigest, sei.ResourceGroupName) + key.Init(sei.SchemaName, sei.Digest, sei.PrevSQLDigest, sei.PlanDigest, sei.ResourceGroupName, userForKey) var exist bool @@ -386,6 +399,7 @@ func (ssMap *stmtSummaryByDigestMap) AddStatement(sei *StmtExecInfo) { if !exist { // Lazy initialize it to release ssMap.mutex ASAP. summary = new(stmtSummaryByDigest) + summary.user = userForKey ssMap.summaryMap.Put(key, summary) } else { summary = value.(*stmtSummaryByDigest) @@ -508,6 +522,23 @@ func (ssMap *stmtSummaryByDigestMap) historySize() int { return int(ssMap.optHistorySize.Load()) } +// SetGroupByUser enables or disables grouping statement summaries by the +// executing user. Switching the flag clears existing data because existing +// rows were aggregated under a different grouping key. +func (ssMap *stmtSummaryByDigestMap) SetGroupByUser(value bool) error { + if ssMap.optGroupByUser.Load() == value { + return nil + } + ssMap.optGroupByUser.Store(value) + ssMap.Clear() + return nil +} + +// GroupByUser reports whether statement summaries are grouped by user. +func (ssMap *stmtSummaryByDigestMap) GroupByUser() bool { + return ssMap.optGroupByUser.Load() +} + // SetHistorySize sets the history size for all summaries. func (ssMap *stmtSummaryByDigestMap) SetMaxStmtCount(value uint) error { // `optMaxStmtCount` and `ssMap` don't need to be strictly atomically updated. diff --git a/pkg/util/stmtsummary/statement_summary_test.go b/pkg/util/stmtsummary/statement_summary_test.go index 66bf192cf2a6f..e8e0f2c49b780 100644 --- a/pkg/util/stmtsummary/statement_summary_test.go +++ b/pkg/util/stmtsummary/statement_summary_test.go @@ -75,7 +75,7 @@ func TestAddStatement(t *testing.T) { stmtExecInfo1 := generateAnyExecInfo() stmtExecInfo1.ExecDetail.CommitDetail.Mu.PrewriteBackoffTypes = make([]string, 0) key := &StmtDigestKey{} - key.Init(stmtExecInfo1.SchemaName, stmtExecInfo1.Digest, "", stmtExecInfo1.PlanDigest, stmtExecInfo1.ResourceGroupName) + key.Init(stmtExecInfo1.SchemaName, stmtExecInfo1.Digest, "", stmtExecInfo1.PlanDigest, stmtExecInfo1.ResourceGroupName, "") samplePlan, _, _ := stmtExecInfo1.LazyInfo.GetEncodedPlan() stmtExecInfo1.ExecDetail.CommitDetail.Mu.Lock() expectedSummaryElement := stmtSummaryByDigestElement{ @@ -485,7 +485,7 @@ func TestAddStatement(t *testing.T) { stmtExecInfo4.SchemaName = "schema2" stmtExecInfo4.ExecDetail.CommitDetail = nil key = &StmtDigestKey{} - key.Init(stmtExecInfo4.SchemaName, stmtExecInfo4.Digest, "", stmtExecInfo4.PlanDigest, stmtExecInfo4.ResourceGroupName) + key.Init(stmtExecInfo4.SchemaName, stmtExecInfo4.Digest, "", stmtExecInfo4.PlanDigest, stmtExecInfo4.ResourceGroupName, "") ssMap.AddStatement(stmtExecInfo4) require.Equal(t, 2, ssMap.summaryMap.Size()) _, ok = ssMap.summaryMap.Get(key) @@ -495,7 +495,7 @@ func TestAddStatement(t *testing.T) { stmtExecInfo5 := stmtExecInfo1 stmtExecInfo5.Digest = "digest2" key = &StmtDigestKey{} - key.Init(stmtExecInfo5.SchemaName, stmtExecInfo5.Digest, "", stmtExecInfo5.PlanDigest, stmtExecInfo5.ResourceGroupName) + key.Init(stmtExecInfo5.SchemaName, stmtExecInfo5.Digest, "", stmtExecInfo5.PlanDigest, stmtExecInfo5.ResourceGroupName, "") ssMap.AddStatement(stmtExecInfo5) require.Equal(t, 3, ssMap.summaryMap.Size()) _, ok = ssMap.summaryMap.Get(key) @@ -505,7 +505,7 @@ func TestAddStatement(t *testing.T) { stmtExecInfo6 := stmtExecInfo1 stmtExecInfo6.PlanDigest = "plan_digest2" key = &StmtDigestKey{} - key.Init(stmtExecInfo6.SchemaName, stmtExecInfo6.Digest, "", stmtExecInfo6.PlanDigest, stmtExecInfo6.ResourceGroupName) + key.Init(stmtExecInfo6.SchemaName, stmtExecInfo6.Digest, "", stmtExecInfo6.PlanDigest, stmtExecInfo6.ResourceGroupName, "") ssMap.AddStatement(stmtExecInfo6) require.Equal(t, 4, ssMap.summaryMap.Size()) _, ok = ssMap.summaryMap.Get(key) @@ -528,7 +528,7 @@ func TestAddStatement(t *testing.T) { bindingSQL: originalSQL, } key = &StmtDigestKey{} - key.Init(stmtExecInfo7.SchemaName, stmtExecInfo7.Digest, "", stmtExecInfo7.PlanDigest, stmtExecInfo7.ResourceGroupName) + key.Init(stmtExecInfo7.SchemaName, stmtExecInfo7.Digest, "", stmtExecInfo7.PlanDigest, stmtExecInfo7.ResourceGroupName, "") ssMap.AddStatement(stmtExecInfo7) require.Equal(t, 5, ssMap.summaryMap.Size()) v, ok := ssMap.summaryMap.Get(key) @@ -1100,7 +1100,7 @@ func TestMaxStmtCount(t *testing.T) { // LRU cache should work. for i := loops - 10; i < loops; i++ { key := &StmtDigestKey{} - key.Init(stmtExecInfo1.SchemaName, fmt.Sprintf("digest%d", i), "", stmtExecInfo1.PlanDigest, stmtExecInfo1.ResourceGroupName) + key.Init(stmtExecInfo1.SchemaName, fmt.Sprintf("digest%d", i), "", stmtExecInfo1.PlanDigest, stmtExecInfo1.ResourceGroupName, "") key.Hash() _, ok := sm.Get(key) require.True(t, ok) @@ -1144,7 +1144,7 @@ func TestMaxSQLLength(t *testing.T) { ssMap.AddStatement(stmtExecInfo1) key := &StmtDigestKey{} - key.Init(stmtExecInfo1.SchemaName, stmtExecInfo1.Digest, "", stmtExecInfo1.PlanDigest, stmtExecInfo1.ResourceGroupName) + key.Init(stmtExecInfo1.SchemaName, stmtExecInfo1.Digest, "", stmtExecInfo1.PlanDigest, stmtExecInfo1.ResourceGroupName, "") value, ok := ssMap.summaryMap.Get(key) require.True(t, ok) @@ -1337,7 +1337,7 @@ func TestRefreshCurrentSummary(t *testing.T) { ssMap.beginTimeForCurInterval = now + 10 stmtExecInfo1 := generateAnyExecInfo() key := &StmtDigestKey{} - key.Init(stmtExecInfo1.SchemaName, stmtExecInfo1.Digest, "", stmtExecInfo1.PlanDigest, stmtExecInfo1.ResourceGroupName) + key.Init(stmtExecInfo1.SchemaName, stmtExecInfo1.Digest, "", stmtExecInfo1.PlanDigest, stmtExecInfo1.ResourceGroupName, "") ssMap.AddStatement(stmtExecInfo1) require.Equal(t, 1, ssMap.summaryMap.Size()) value, ok := ssMap.summaryMap.Get(key) @@ -1384,7 +1384,7 @@ func TestSummaryHistory(t *testing.T) { stmtExecInfo1 := generateAnyExecInfo() key := &StmtDigestKey{} - key.Init(stmtExecInfo1.SchemaName, stmtExecInfo1.Digest, "", stmtExecInfo1.PlanDigest, stmtExecInfo1.ResourceGroupName) + key.Init(stmtExecInfo1.SchemaName, stmtExecInfo1.Digest, "", stmtExecInfo1.PlanDigest, stmtExecInfo1.ResourceGroupName, "") for i := range 11 { ssMap.beginTimeForCurInterval = now + int64(i+1)*10 ssMap.AddStatement(stmtExecInfo1) @@ -1453,7 +1453,7 @@ func TestPrevSQL(t *testing.T) { stmtExecInfo1.PrevSQLDigest = "prevSQLDigest" ssMap.AddStatement(stmtExecInfo1) key := &StmtDigestKey{} - key.Init(stmtExecInfo1.SchemaName, stmtExecInfo1.Digest, stmtExecInfo1.PrevSQLDigest, stmtExecInfo1.PlanDigest, stmtExecInfo1.ResourceGroupName) + key.Init(stmtExecInfo1.SchemaName, stmtExecInfo1.Digest, stmtExecInfo1.PrevSQLDigest, stmtExecInfo1.PlanDigest, stmtExecInfo1.ResourceGroupName, "") require.Equal(t, 1, ssMap.summaryMap.Size()) _, ok := ssMap.summaryMap.Get(key) require.True(t, ok) @@ -1468,7 +1468,7 @@ func TestPrevSQL(t *testing.T) { stmtExecInfo2.PrevSQLDigest = "prevSQLDigest1" ssMap.AddStatement(stmtExecInfo2) require.Equal(t, 2, ssMap.summaryMap.Size()) - key.Init(stmtExecInfo2.SchemaName, stmtExecInfo2.Digest, stmtExecInfo2.PrevSQLDigest, stmtExecInfo2.PlanDigest, stmtExecInfo2.ResourceGroupName) + key.Init(stmtExecInfo2.SchemaName, stmtExecInfo2.Digest, stmtExecInfo2.PrevSQLDigest, stmtExecInfo2.PlanDigest, stmtExecInfo2.ResourceGroupName, "") _, ok = ssMap.summaryMap.Get(key) require.True(t, ok) } @@ -1481,7 +1481,7 @@ func TestEndTime(t *testing.T) { stmtExecInfo1 := generateAnyExecInfo() ssMap.AddStatement(stmtExecInfo1) key := &StmtDigestKey{} - key.Init(stmtExecInfo1.SchemaName, stmtExecInfo1.Digest, "", stmtExecInfo1.PlanDigest, stmtExecInfo1.ResourceGroupName) + key.Init(stmtExecInfo1.SchemaName, stmtExecInfo1.Digest, "", stmtExecInfo1.PlanDigest, stmtExecInfo1.ResourceGroupName, "") require.Equal(t, 1, ssMap.summaryMap.Size()) value, ok := ssMap.summaryMap.Get(key) require.True(t, ok) @@ -1527,7 +1527,7 @@ func TestPointGet(t *testing.T) { stmtExecInfo1.LazyInfo.(*mockLazyInfo).plan = fakePlanDigestGenerator() ssMap.AddStatement(stmtExecInfo1) key := &StmtDigestKey{} - key.Init(stmtExecInfo1.SchemaName, stmtExecInfo1.Digest, "", "", stmtExecInfo1.ResourceGroupName) + key.Init(stmtExecInfo1.SchemaName, stmtExecInfo1.Digest, "", "", stmtExecInfo1.ResourceGroupName, "") require.Equal(t, 1, ssMap.summaryMap.Size()) value, ok := ssMap.summaryMap.Get(key) require.True(t, ok) @@ -1606,3 +1606,46 @@ func TestAccessPrivilege(t *testing.T) { datums = reader.GetStmtSummaryCurrentRows() require.Len(t, datums, loops) } + +// TestAddStatementGroupByUser verifies that flipping the group-by-user flag +// splits the same digest into per-user rows and fills ssbd.user. The default +// (flag OFF) keeps legacy behavior: one row per digest regardless of user. +func TestAddStatementGroupByUser(t *testing.T) { + ssMap := newStmtSummaryByDigestMap() + + info1 := generateAnyExecInfo() + info1.User = "alice" + info2 := generateAnyExecInfo() + info2.User = "bob" + + // Flag off: both statements collapse into one record. + ssMap.AddStatement(info1) + ssMap.AddStatement(info2) + require.Equal(t, 1, ssMap.summaryMap.Size()) + + // Flipping the flag clears prior data (different grouping key). + require.NoError(t, ssMap.SetGroupByUser(true)) + require.Equal(t, 0, ssMap.summaryMap.Size()) + + ssMap.AddStatement(info1) + ssMap.AddStatement(info2) + ssMap.AddStatement(info1) + require.Equal(t, 2, ssMap.summaryMap.Size()) + + seen := map[string]bool{} + for _, v := range ssMap.summaryMap.Values() { + ssbd := v.(*stmtSummaryByDigest) + seen[ssbd.user] = true + } + require.True(t, seen["alice"]) + require.True(t, seen["bob"]) + + // Flipping back off clears again, and new records keep user empty. + require.NoError(t, ssMap.SetGroupByUser(false)) + require.Equal(t, 0, ssMap.summaryMap.Size()) + ssMap.AddStatement(info1) + require.Equal(t, 1, ssMap.summaryMap.Size()) + for _, v := range ssMap.summaryMap.Values() { + require.Empty(t, v.(*stmtSummaryByDigest).user) + } +} diff --git a/pkg/util/stmtsummary/v2/BUILD.bazel b/pkg/util/stmtsummary/v2/BUILD.bazel index 8bc5284bfe3bf..131ef9cbcee76 100644 --- a/pkg/util/stmtsummary/v2/BUILD.bazel +++ b/pkg/util/stmtsummary/v2/BUILD.bazel @@ -48,7 +48,7 @@ go_test( ], embed = [":stmtsummary"], flaky = True, - shard_count = 13, + shard_count = 14, deps = [ "//pkg/meta/model", "//pkg/parser/ast", diff --git a/pkg/util/stmtsummary/v2/column.go b/pkg/util/stmtsummary/v2/column.go index 7853aae91e42c..a8637198e010f 100644 --- a/pkg/util/stmtsummary/v2/column.go +++ b/pkg/util/stmtsummary/v2/column.go @@ -42,6 +42,7 @@ const ( TableNamesStr = "TABLE_NAMES" IndexNamesStr = "INDEX_NAMES" SampleUserStr = "SAMPLE_USER" + UserStr = "USER" ExecCountStr = "EXEC_COUNT" SumErrorsStr = "SUM_ERRORS" SumWarningsStr = "SUM_WARNINGS" @@ -212,6 +213,9 @@ var columnFactoryMap = map[string]columnFactory{ } return convertEmptyToNil(sampleUser) }, + UserStr: func(_ columnInfo, record *StmtRecord) any { + return convertEmptyToNil(record.User) + }, ExecCountStr: func(_ columnInfo, record *StmtRecord) any { return record.ExecCount }, diff --git a/pkg/util/stmtsummary/v2/record.go b/pkg/util/stmtsummary/v2/record.go index 5fb6fbcbef003..c6bb6d6402370 100644 --- a/pkg/util/stmtsummary/v2/record.go +++ b/pkg/util/stmtsummary/v2/record.go @@ -50,6 +50,10 @@ type StmtRecord struct { IsInternal bool `json:"is_internal"` BindingSQL string `json:"binding_sql"` BindingDigest string `json:"binding_digest"` + // User is populated from StmtExecInfo.User at record creation when + // group_by_user is enabled; otherwise empty. Once set it never changes, + // because records with different users live in different grouping buckets. + User string `json:"user,omitempty"` // Basic SampleSQL string `json:"sample_sql"` Charset string `json:"charset"` diff --git a/pkg/util/stmtsummary/v2/stmtsummary.go b/pkg/util/stmtsummary/v2/stmtsummary.go index 654aa6b53a997..1c72ba138bd8e 100644 --- a/pkg/util/stmtsummary/v2/stmtsummary.go +++ b/pkg/util/stmtsummary/v2/stmtsummary.go @@ -85,6 +85,7 @@ type StmtSummary struct { optMaxStmtCount *atomic2.Uint32 optMaxSQLLength *atomic2.Uint32 optRefreshInterval *atomic2.Uint32 + optGroupByUser *atomic2.Bool window *stmtWindow windowLock sync.Mutex @@ -112,6 +113,7 @@ func NewStmtSummary(cfg *Config) (*StmtSummary, error) { optMaxStmtCount: atomic2.NewUint32(defaultMaxStmtCount), optMaxSQLLength: atomic2.NewUint32(defaultMaxSQLLength), optRefreshInterval: atomic2.NewUint32(defaultRefreshInterval), + optGroupByUser: atomic2.NewBool(false), window: newStmtWindow(timeNow(), uint(defaultMaxStmtCount)), storage: newStmtLogStorage(&log.Config{ File: log.FileLogConfig{ @@ -145,6 +147,7 @@ func NewStmtSummary4Test(maxStmtCount uint) *StmtSummary { optMaxStmtCount: atomic2.NewUint32(defaultMaxStmtCount), optMaxSQLLength: atomic2.NewUint32(defaultMaxSQLLength), optRefreshInterval: atomic2.NewUint32(60 * 60 * 24 * 365), // 1 year + optGroupByUser: atomic2.NewBool(false), window: newStmtWindow(timeNow(), maxStmtCount), storage: &mockStmtStorage{}, } @@ -234,6 +237,24 @@ func (s *StmtSummary) SetRefreshInterval(v uint32) error { return nil } +// GroupByUser reports whether statement summaries are grouped by the +// executing user in addition to the usual digest/schema/plan tuple. +func (s *StmtSummary) GroupByUser() bool { + return s.optGroupByUser.Load() +} + +// SetGroupByUser toggles user-dimension grouping. Switching the flag clears +// the in-memory window because existing records were aggregated under a +// different grouping key; persisted records are unaffected. +func (s *StmtSummary) SetGroupByUser(v bool) error { + if s.optGroupByUser.Load() == v { + return nil + } + s.optGroupByUser.Store(v) + s.Clear() + return nil +} + // Add adds a single stmtsummary.StmtExecInfo to the current statistics window // of StmtSummary. Before adding, it will check whether the current window has // expired, and if it has expired, the window will be persisted asynchronously @@ -243,9 +264,14 @@ func (s *StmtSummary) Add(info *stmtsummary.StmtExecInfo) { return } + userForKey := "" + if s.optGroupByUser.Load() { + userForKey = info.User + } + k := stmtsummary.StmtDigestKeyPool.Get().(*stmtsummary.StmtDigestKey) // Init hash value in advance, to reduce the time holding the lock. - k.Init(info.SchemaName, info.Digest, info.PrevSQLDigest, info.PlanDigest, info.ResourceGroupName) + k.Init(info.SchemaName, info.Digest, info.PrevSQLDigest, info.PlanDigest, info.ResourceGroupName, userForKey) // Add info to the current statistics window. s.windowLock.Lock() @@ -255,6 +281,9 @@ func (s *StmtSummary) Add(info *stmtsummary.StmtExecInfo) { record = v.(*lockedStmtRecord) } else { record = &lockedStmtRecord{StmtRecord: NewStmtRecord(info)} + // Populate User only when user is part of the grouping key; this + // keeps "" in records created before the flag was flipped. + record.User = userForKey s.window.lru.Put(k, record) } s.windowLock.Unlock() @@ -529,3 +558,15 @@ func SetMaxSQLLength(v int) error { } return stmtsummary.StmtSummaryByDigestMap.SetMaxSQLLength(v) } + +// SetGroupByUser toggles the user dimension on both v1 and v2 so the sysvar +// setter can call one entry point regardless of which backend is active. +func SetGroupByUser(v bool) error { + if err := stmtsummary.StmtSummaryByDigestMap.SetGroupByUser(v); err != nil { + return err + } + if GlobalStmtSummary != nil { + return GlobalStmtSummary.SetGroupByUser(v) + } + return nil +} diff --git a/pkg/util/stmtsummary/v2/stmtsummary_test.go b/pkg/util/stmtsummary/v2/stmtsummary_test.go index 3256e6d0af960..594aabfe07a7d 100644 --- a/pkg/util/stmtsummary/v2/stmtsummary_test.go +++ b/pkg/util/stmtsummary/v2/stmtsummary_test.go @@ -18,6 +18,7 @@ import ( "encoding/json" "testing" + "github.com/pingcap/tidb/pkg/util/stmtsummary" "github.com/stretchr/testify/require" ) @@ -69,6 +70,53 @@ func TestStmtSummary(t *testing.T) { require.Equal(t, 0, w.lru.Size()) } +func TestStmtSummaryGroupByUser(t *testing.T) { + ss := NewStmtSummary4Test(100) + defer ss.Close() + + // Two statements, same digest, different users: without the flag they + // should merge into one record. + ss.Add(stmtExecInfoWithUser("digest1", "alice")) + ss.Add(stmtExecInfoWithUser("digest1", "bob")) + require.Equal(t, 1, ss.window.lru.Size()) + + // Switching the flag on clears the window. Re-emitting produces two rows. + require.NoError(t, ss.SetGroupByUser(true)) + require.Equal(t, 0, ss.window.lru.Size()) + ss.Add(stmtExecInfoWithUser("digest1", "alice")) + ss.Add(stmtExecInfoWithUser("digest1", "bob")) + ss.Add(stmtExecInfoWithUser("digest1", "alice")) + require.Equal(t, 2, ss.window.lru.Size()) + + // Each record should remember the User that groups it so the USER column + // can be emitted without scanning AuthUsers. + users := map[string]int64{} + for _, v := range ss.window.lru.Values() { + r := v.(*lockedStmtRecord) + users[r.User] = r.ExecCount + } + require.Equal(t, int64(2), users["alice"]) + require.Equal(t, int64(1), users["bob"]) + + // Turning the flag off again clears and reverts to single-record merging. + require.NoError(t, ss.SetGroupByUser(false)) + ss.Add(stmtExecInfoWithUser("digest1", "alice")) + ss.Add(stmtExecInfoWithUser("digest1", "bob")) + require.Equal(t, 1, ss.window.lru.Size()) + for _, v := range ss.window.lru.Values() { + r := v.(*lockedStmtRecord) + require.Empty(t, r.User) // group_by_user OFF leaves User empty + } +} + +// stmtExecInfoWithUser returns a StmtExecInfo whose digest and User fields are +// set; everything else is the generic test fixture. +func stmtExecInfoWithUser(digest, user string) *stmtsummary.StmtExecInfo { + info := GenerateStmtExecInfo4Test(digest) + info.User = user + return info +} + func TestStmtSummaryFlush(t *testing.T) { storage := &mockStmtStorage{} ss := NewStmtSummary4Test(1000) From e39649623628a8527f6354e518f16433f0b8e79f Mon Sep 17 00:00:00 2001 From: nolouch Date: Wed, 20 May 2026 16:35:00 -0700 Subject: [PATCH 2/6] util/stmtsummary: fix group_by_user race and reuse SAMPLE_USER Address CodeRabbit review on #68512: - Hold the digest-map / window lock across SetGroupByUser's flag flip and clear, and read the flag while holding that same lock in AddStatement/Add. Previously the flag was flipped outside the lock, so an in-flight insert could land under the old grouping mode after Clear() completed and mix key modes within one window. - Drop the dedicated USER column and record.user/summary.user fields. When group_by_user is ON, info.User is part of the grouping key, so each row's AuthUsers set already contains exactly the grouped user; SAMPLE_USER reflects the grouping dimension naturally without a new column. Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/infoschema/tables.go | 2 -- pkg/util/stmtsummary/reader.go | 4 --- pkg/util/stmtsummary/statement_summary.go | 31 ++++++++++++------- .../stmtsummary/statement_summary_test.go | 16 ++++++++-- pkg/util/stmtsummary/v2/column.go | 4 --- pkg/util/stmtsummary/v2/record.go | 4 --- pkg/util/stmtsummary/v2/stmtsummary.go | 23 ++++++++------ pkg/util/stmtsummary/v2/stmtsummary_test.go | 12 ++++--- 8 files changed, 53 insertions(+), 43 deletions(-) diff --git a/pkg/infoschema/tables.go b/pkg/infoschema/tables.go index 74078d4233c3c..3ce9897994e71 100644 --- a/pkg/infoschema/tables.go +++ b/pkg/infoschema/tables.go @@ -1329,7 +1329,6 @@ var tableStatementsSummaryCols = []columnInfo{ {name: stmtsummary.TableNamesStr, tp: mysql.TypeBlob, size: types.UnspecifiedLength, comment: "Involved tables"}, {name: stmtsummary.IndexNamesStr, tp: mysql.TypeBlob, size: types.UnspecifiedLength, comment: "Used indices"}, {name: stmtsummary.SampleUserStr, tp: mysql.TypeVarchar, size: 64, comment: "Sampled user who executed these statements"}, - {name: stmtsummary.UserStr, tp: mysql.TypeVarchar, size: 64, comment: "User that groups this summary when tidb_stmt_summary_group_by_user is ON"}, {name: stmtsummary.ExecCountStr, tp: mysql.TypeLonglong, size: 20, flag: mysql.NotNullFlag | mysql.UnsignedFlag, comment: "Count of executions"}, {name: stmtsummary.SumErrorsStr, tp: mysql.TypeLong, size: 11, flag: mysql.NotNullFlag | mysql.UnsignedFlag, comment: "Sum of errors"}, {name: stmtsummary.SumWarningsStr, tp: mysql.TypeLong, size: 11, flag: mysql.NotNullFlag | mysql.UnsignedFlag, comment: "Sum of warnings"}, @@ -1453,7 +1452,6 @@ var tableTiDBStatementsStatsCols = []columnInfo{ {name: stmtsummary.TableNamesStr, tp: mysql.TypeBlob, size: types.UnspecifiedLength, comment: "Involved tables"}, {name: stmtsummary.IndexNamesStr, tp: mysql.TypeBlob, size: types.UnspecifiedLength, comment: "Used indices"}, {name: stmtsummary.SampleUserStr, tp: mysql.TypeVarchar, size: 64, comment: "Sampled user who executed these statements"}, - {name: stmtsummary.UserStr, tp: mysql.TypeVarchar, size: 64, comment: "User that groups this summary when tidb_stmt_summary_group_by_user is ON"}, {name: stmtsummary.ExecCountStr, tp: mysql.TypeLonglong, size: 20, flag: mysql.NotNullFlag | mysql.UnsignedFlag, comment: "Count of executions"}, {name: stmtsummary.ErrorsStr, tp: mysql.TypeLong, size: 11, flag: mysql.NotNullFlag | mysql.UnsignedFlag, comment: "Sum of errors"}, {name: stmtsummary.WarningsStr, tp: mysql.TypeLong, size: 11, flag: mysql.NotNullFlag | mysql.UnsignedFlag, comment: "Sum of warnings"}, diff --git a/pkg/util/stmtsummary/reader.go b/pkg/util/stmtsummary/reader.go index b7169759be00d..90edc89cbbb33 100644 --- a/pkg/util/stmtsummary/reader.go +++ b/pkg/util/stmtsummary/reader.go @@ -266,7 +266,6 @@ const ( TableNamesStr = "TABLE_NAMES" IndexNamesStr = "INDEX_NAMES" SampleUserStr = "SAMPLE_USER" - UserStr = "USER" ExecCountStr = "EXEC_COUNT" SumErrorsStr = "SUM_ERRORS" SumWarningsStr = "SUM_WARNINGS" @@ -490,9 +489,6 @@ var columnValueFactoryMap = map[string]columnValueFactory{ } return convertEmptyToNil(sampleUser) }, - UserStr: func(_ *stmtSummaryReader, _ *stmtSummaryByDigestElement, ssbd *stmtSummaryByDigest, _ *stmtSummaryStats) any { - return convertEmptyToNil(ssbd.user) - }, ExecCountStr: func(_ *stmtSummaryReader, _ *stmtSummaryByDigestElement, _ *stmtSummaryByDigest, ssStats *stmtSummaryStats) any { return ssStats.execCount }, diff --git a/pkg/util/stmtsummary/statement_summary.go b/pkg/util/stmtsummary/statement_summary.go index ab4b156793bad..76e022c6e6fbd 100644 --- a/pkg/util/stmtsummary/statement_summary.go +++ b/pkg/util/stmtsummary/statement_summary.go @@ -124,9 +124,6 @@ type stmtSummaryByDigest struct { isInternal bool bindingSQL string bindingDigest string - // user is populated when group_by_user is enabled at creation time. - // When disabled, it is empty and not part of the grouping key. - user string } // stmtSummaryByDigestElement is the summary for each type of statements in current interval. @@ -362,14 +359,7 @@ func (ssMap *stmtSummaryByDigestMap) AddStatement(sei *StmtExecInfo) { historySize = ssMap.historySize() } - userForKey := "" - if ssMap.optGroupByUser.Load() { - userForKey = sei.User - } - key := StmtDigestKeyPool.Get().(*StmtDigestKey) - // Init hash value in advance, to reduce the time holding the lock. - key.Init(sei.SchemaName, sei.Digest, sei.PrevSQLDigest, sei.PlanDigest, sei.ResourceGroupName, userForKey) var exist bool @@ -383,6 +373,15 @@ func (ssMap *stmtSummaryByDigestMap) AddStatement(sei *StmtExecInfo) { ssMap.Lock() defer ssMap.Unlock() + // Decide userForKey under the lock so SetGroupByUser's flag flip + Clear + // is atomic w.r.t. AddStatement; otherwise a post-clear insert could land + // under the wrong grouping mode. + userForKey := "" + if ssMap.optGroupByUser.Load() { + userForKey = sei.User + } + key.Init(sei.SchemaName, sei.Digest, sei.PrevSQLDigest, sei.PlanDigest, sei.ResourceGroupName, userForKey) + // Check again. Statements could be added before disabling the flag and after Clear(). if !ssMap.Enabled() { return @@ -405,7 +404,6 @@ func (ssMap *stmtSummaryByDigestMap) AddStatement(sei *StmtExecInfo) { if !exist { // Lazy initialize it to release ssMap.mutex ASAP. summary = new(stmtSummaryByDigest) - summary.user = userForKey ssMap.summaryMap.Put(key, summary) } else { summary = value.(*stmtSummaryByDigest) @@ -536,11 +534,20 @@ func (ssMap *stmtSummaryByDigestMap) historySize() int { // executing user. Switching the flag clears existing data because existing // rows were aggregated under a different grouping key. func (ssMap *stmtSummaryByDigestMap) SetGroupByUser(value bool) error { + // Hold ssMap.Lock across the flag flip and clear so AddStatement (which + // reads the flag under the same lock) cannot insert a record with the + // old grouping mode after Clear() completes. + ssMap.Lock() + defer ssMap.Unlock() if ssMap.optGroupByUser.Load() == value { return nil } ssMap.optGroupByUser.Store(value) - ssMap.Clear() + ssMap.summaryMap.DeleteAll() + ssMap.other.Clear() + ssMap.beginTimeForCurInterval = 0 + ssMap.currentWindowEvictedCount = 0 + ssMap.updateMetricsLocked() return nil } diff --git a/pkg/util/stmtsummary/statement_summary_test.go b/pkg/util/stmtsummary/statement_summary_test.go index 185590191c464..3f74bb4e8992c 100644 --- a/pkg/util/stmtsummary/statement_summary_test.go +++ b/pkg/util/stmtsummary/statement_summary_test.go @@ -1713,20 +1713,30 @@ func TestAddStatementGroupByUser(t *testing.T) { ssMap.AddStatement(info1) require.Equal(t, 2, ssMap.summaryMap.Size()) + // With grouping ON, each record's authUsers must hold exactly one user — + // the one that groups it — so SAMPLE_USER naturally reflects the grouping + // dimension without a dedicated column. seen := map[string]bool{} for _, v := range ssMap.summaryMap.Values() { ssbd := v.(*stmtSummaryByDigest) - seen[ssbd.user] = true + elem := ssbd.history.Front().Value.(*stmtSummaryByDigestElement) + require.Len(t, elem.authUsers, 1) + for u := range elem.authUsers { + seen[u] = true + } } require.True(t, seen["alice"]) require.True(t, seen["bob"]) - // Flipping back off clears again, and new records keep user empty. + // Flipping back off clears again, and re-emitted records merge users. require.NoError(t, ssMap.SetGroupByUser(false)) require.Equal(t, 0, ssMap.summaryMap.Size()) ssMap.AddStatement(info1) + ssMap.AddStatement(info2) require.Equal(t, 1, ssMap.summaryMap.Size()) for _, v := range ssMap.summaryMap.Values() { - require.Empty(t, v.(*stmtSummaryByDigest).user) + ssbd := v.(*stmtSummaryByDigest) + elem := ssbd.history.Front().Value.(*stmtSummaryByDigestElement) + require.Len(t, elem.authUsers, 2) } } diff --git a/pkg/util/stmtsummary/v2/column.go b/pkg/util/stmtsummary/v2/column.go index e2404a1a9ec6c..68ec4e170a88e 100644 --- a/pkg/util/stmtsummary/v2/column.go +++ b/pkg/util/stmtsummary/v2/column.go @@ -42,7 +42,6 @@ const ( TableNamesStr = "TABLE_NAMES" IndexNamesStr = "INDEX_NAMES" SampleUserStr = "SAMPLE_USER" - UserStr = "USER" ExecCountStr = "EXEC_COUNT" SumErrorsStr = "SUM_ERRORS" SumWarningsStr = "SUM_WARNINGS" @@ -215,9 +214,6 @@ var columnFactoryMap = map[string]columnFactory{ } return convertEmptyToNil(sampleUser) }, - UserStr: func(_ columnInfo, record *StmtRecord) any { - return convertEmptyToNil(record.User) - }, ExecCountStr: func(_ columnInfo, record *StmtRecord) any { return record.ExecCount }, diff --git a/pkg/util/stmtsummary/v2/record.go b/pkg/util/stmtsummary/v2/record.go index 6ab7cd5f710bc..e7fa2d38900ac 100644 --- a/pkg/util/stmtsummary/v2/record.go +++ b/pkg/util/stmtsummary/v2/record.go @@ -50,10 +50,6 @@ type StmtRecord struct { IsInternal bool `json:"is_internal"` BindingSQL string `json:"binding_sql"` BindingDigest string `json:"binding_digest"` - // User is populated from StmtExecInfo.User at record creation when - // group_by_user is enabled; otherwise empty. Once set it never changes, - // because records with different users live in different grouping buckets. - User string `json:"user,omitempty"` // Basic SampleSQL string `json:"sample_sql"` Charset string `json:"charset"` diff --git a/pkg/util/stmtsummary/v2/stmtsummary.go b/pkg/util/stmtsummary/v2/stmtsummary.go index 207bda9b745d5..8eec704ea7d91 100644 --- a/pkg/util/stmtsummary/v2/stmtsummary.go +++ b/pkg/util/stmtsummary/v2/stmtsummary.go @@ -248,11 +248,16 @@ func (s *StmtSummary) GroupByUser() bool { // the in-memory window because existing records were aggregated under a // different grouping key; persisted records are unaffected. func (s *StmtSummary) SetGroupByUser(v bool) error { + // Hold windowLock across the flag flip and clear so Add (which reads + // the flag under the same lock) cannot insert a record with the old + // grouping mode after the window is cleared. + s.windowLock.Lock() + defer s.windowLock.Unlock() if s.optGroupByUser.Load() == v { return nil } s.optGroupByUser.Store(v) - s.Clear() + s.window.clear() return nil } @@ -265,26 +270,24 @@ func (s *StmtSummary) Add(info *stmtsummary.StmtExecInfo) { return } + k := stmtsummary.StmtDigestKeyPool.Get().(*stmtsummary.StmtDigestKey) + + // Add info to the current statistics window. + s.windowLock.Lock() + // Decide userForKey under windowLock so SetGroupByUser's flag flip + clear + // is atomic w.r.t. Add; otherwise a post-clear insert could land under the + // wrong grouping mode. userForKey := "" if s.optGroupByUser.Load() { userForKey = info.User } - - k := stmtsummary.StmtDigestKeyPool.Get().(*stmtsummary.StmtDigestKey) - // Init hash value in advance, to reduce the time holding the lock. k.Init(info.SchemaName, info.Digest, info.PrevSQLDigest, info.PlanDigest, info.ResourceGroupName, userForKey) - - // Add info to the current statistics window. - s.windowLock.Lock() var record *lockedStmtRecord v, exist := s.window.lru.Get(k) if exist { record = v.(*lockedStmtRecord) } else { record = &lockedStmtRecord{StmtRecord: NewStmtRecord(info)} - // Populate User only when user is part of the grouping key; this - // keeps "" in records created before the flag was flipped. - record.User = userForKey s.window.lru.Put(k, record) } s.windowLock.Unlock() diff --git a/pkg/util/stmtsummary/v2/stmtsummary_test.go b/pkg/util/stmtsummary/v2/stmtsummary_test.go index 09d9902b56fd8..4d48dd30474d6 100644 --- a/pkg/util/stmtsummary/v2/stmtsummary_test.go +++ b/pkg/util/stmtsummary/v2/stmtsummary_test.go @@ -101,12 +101,16 @@ func TestStmtSummaryGroupByUser(t *testing.T) { ss.Add(stmtExecInfoWithUser("digest1", "alice")) require.Equal(t, 2, ss.window.lru.Size()) - // Each record should remember the User that groups it so the USER column - // can be emitted without scanning AuthUsers. + // When grouping by user, each record's AuthUsers must hold exactly one + // user — the one that groups it — so SAMPLE_USER naturally reflects the + // grouping dimension without a dedicated column. users := map[string]int64{} for _, v := range ss.window.lru.Values() { r := v.(*lockedStmtRecord) - users[r.User] = r.ExecCount + require.Len(t, r.AuthUsers, 1) + for u := range r.AuthUsers { + users[u] = r.ExecCount + } } require.Equal(t, int64(2), users["alice"]) require.Equal(t, int64(1), users["bob"]) @@ -118,7 +122,7 @@ func TestStmtSummaryGroupByUser(t *testing.T) { require.Equal(t, 1, ss.window.lru.Size()) for _, v := range ss.window.lru.Values() { r := v.(*lockedStmtRecord) - require.Empty(t, r.User) // group_by_user OFF leaves User empty + require.Len(t, r.AuthUsers, 2) // both users merged when grouping is off } } From 03c144655cce5c7f9e105b6d126fc102a8068da9 Mon Sep 17 00:00:00 2001 From: nolouch Date: Wed, 20 May 2026 17:10:14 -0700 Subject: [PATCH 3/6] util/stmtsummary: encode boundary between resource_group and user in digest key Address CodeRabbit outside-diff finding on #68512: StmtDigestKey.Init appended resourceGroupName and user back-to-back, so pairs like ("rg", "alice") and ("rga", "lice") collide and silently merge across users when group_by_user is enabled. Prefix the user segment with a 4-byte big-endian length only when user is non-empty. With group_by_user OFF (user=""), the hash stays byte-identical to the previous 5-field encoding, preserving compatibility with in-memory records produced before this change. Add TestStmtDigestKeyBoundary covering both: the collision case and the OFF-mode byte-compat case. Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/util/stmtsummary/BUILD.bazel | 2 +- pkg/util/stmtsummary/statement_summary.go | 18 ++++++++++--- .../stmtsummary/statement_summary_test.go | 25 +++++++++++++++++++ 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/pkg/util/stmtsummary/BUILD.bazel b/pkg/util/stmtsummary/BUILD.bazel index d29bfe9e770c3..f98a374797061 100644 --- a/pkg/util/stmtsummary/BUILD.bazel +++ b/pkg/util/stmtsummary/BUILD.bazel @@ -40,7 +40,7 @@ go_test( ], embed = [":stmtsummary"], flaky = True, - shard_count = 27, + shard_count = 28, deps = [ "//pkg/meta/model", "//pkg/metrics", diff --git a/pkg/util/stmtsummary/statement_summary.go b/pkg/util/stmtsummary/statement_summary.go index 76e022c6e6fbd..0b08c6e4ad61e 100644 --- a/pkg/util/stmtsummary/statement_summary.go +++ b/pkg/util/stmtsummary/statement_summary.go @@ -18,6 +18,7 @@ import ( "bytes" "cmp" "container/list" + "encoding/binary" "fmt" "math" "slices" @@ -53,10 +54,16 @@ type StmtDigestKey struct { } // Init initialize the hash key. -// When group_by_user is disabled, callers should pass an empty string for user, -// so the hash matches pre-user-dimension behavior. +// When user is empty (group_by_user disabled), the hash is byte-identical to +// the pre-user-dimension layout. When user is non-empty, it is prefixed with +// a 4-byte length so the boundary between resourceGroupName and user is +// unambiguous and pairs like ("rg", "alice") and ("rga", "lice") cannot +// collide. func (key *StmtDigestKey) Init(schemaName, digest, prevDigest, planDigest, resourceGroupName, user string) { length := len(schemaName) + len(digest) + len(prevDigest) + len(planDigest) + len(resourceGroupName) + len(user) + if len(user) > 0 { + length += 4 + } if cap(key.hash) < length { key.hash = make([]byte, 0, length) } else { @@ -67,7 +74,12 @@ func (key *StmtDigestKey) Init(schemaName, digest, prevDigest, planDigest, resou key.hash = append(key.hash, hack.Slice(prevDigest)...) key.hash = append(key.hash, hack.Slice(planDigest)...) key.hash = append(key.hash, hack.Slice(resourceGroupName)...) - key.hash = append(key.hash, hack.Slice(user)...) + if len(user) > 0 { + var buf [4]byte + binary.BigEndian.PutUint32(buf[:], uint32(len(user))) + key.hash = append(key.hash, buf[:]...) + key.hash = append(key.hash, hack.Slice(user)...) + } } // Hash implements SimpleLRUCache.Key. diff --git a/pkg/util/stmtsummary/statement_summary_test.go b/pkg/util/stmtsummary/statement_summary_test.go index 3f74bb4e8992c..f8512a3301905 100644 --- a/pkg/util/stmtsummary/statement_summary_test.go +++ b/pkg/util/stmtsummary/statement_summary_test.go @@ -1740,3 +1740,28 @@ func TestAddStatementGroupByUser(t *testing.T) { require.Len(t, elem.authUsers, 2) } } + +// TestStmtDigestKeyBoundary guards against two regressions: +// 1. Adjacent string fields must not collide across boundary, e.g. +// (resourceGroupName, user) = ("rg", "alice") vs ("rga", "lice"); without +// a boundary marker on user, both produce the same hash. +// 2. With user empty (group_by_user OFF), the hash must stay byte-identical +// to the pre-user-dimension encoding so persisted/in-memory rows from +// older versions match. +func TestStmtDigestKeyBoundary(t *testing.T) { + k1 := &StmtDigestKey{} + k1.Init("schema", "digest", "prev", "plan", "rg", "alice") + k2 := &StmtDigestKey{} + k2.Init("schema", "digest", "prev", "plan", "rga", "lice") + require.NotEqual(t, k1.Hash(), k2.Hash(), "user segment must have an unambiguous boundary") + + // user="" leaves the hash equal to the legacy 5-field layout. + kOff := &StmtDigestKey{} + kOff.Init("schema", "digest", "prev", "plan", "rg", "") + legacy := append([]byte{}, hack.Slice("digest")...) + legacy = append(legacy, hack.Slice("schema")...) + legacy = append(legacy, hack.Slice("prev")...) + legacy = append(legacy, hack.Slice("plan")...) + legacy = append(legacy, hack.Slice("rg")...) + require.Equal(t, legacy, kOff.Hash()) +} From 9f43ac1a7dd8fc58362d50a2403e031b5841c337 Mon Sep 17 00:00:00 2001 From: nolouch Date: Wed, 20 May 2026 17:30:33 -0700 Subject: [PATCH 4/6] util/stmtsummary: rename test var kOff to off to satisfy revive var-naming check_dev lint flagged kOff as a leading-k variable name. Rename to off; no behavioral change. Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/util/stmtsummary/statement_summary_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/util/stmtsummary/statement_summary_test.go b/pkg/util/stmtsummary/statement_summary_test.go index f8512a3301905..bdf4fac921c78 100644 --- a/pkg/util/stmtsummary/statement_summary_test.go +++ b/pkg/util/stmtsummary/statement_summary_test.go @@ -1756,12 +1756,12 @@ func TestStmtDigestKeyBoundary(t *testing.T) { require.NotEqual(t, k1.Hash(), k2.Hash(), "user segment must have an unambiguous boundary") // user="" leaves the hash equal to the legacy 5-field layout. - kOff := &StmtDigestKey{} - kOff.Init("schema", "digest", "prev", "plan", "rg", "") + off := &StmtDigestKey{} + off.Init("schema", "digest", "prev", "plan", "rg", "") legacy := append([]byte{}, hack.Slice("digest")...) legacy = append(legacy, hack.Slice("schema")...) legacy = append(legacy, hack.Slice("prev")...) legacy = append(legacy, hack.Slice("plan")...) legacy = append(legacy, hack.Slice("rg")...) - require.Equal(t, legacy, kOff.Hash()) + require.Equal(t, legacy, off.Hash()) } From 985f8e45c85fcb6e55d35b4831e18cb12385bae4 Mon Sep 17 00:00:00 2001 From: nolouch Date: Tue, 26 May 2026 14:12:46 -0700 Subject: [PATCH 5/6] util/stmtsummary: clarify user key segment comment --- pkg/util/stmtsummary/statement_summary.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/util/stmtsummary/statement_summary.go b/pkg/util/stmtsummary/statement_summary.go index 0b08c6e4ad61e..f1f918234f33b 100644 --- a/pkg/util/stmtsummary/statement_summary.go +++ b/pkg/util/stmtsummary/statement_summary.go @@ -55,8 +55,8 @@ type StmtDigestKey struct { // Init initialize the hash key. // When user is empty (group_by_user disabled), the hash is byte-identical to -// the pre-user-dimension layout. When user is non-empty, it is prefixed with -// a 4-byte length so the boundary between resourceGroupName and user is +// the pre-user-dimension layout. When user is non-empty, the hash appends a +// length-prefixed user segment after resourceGroupName so the boundary is // unambiguous and pairs like ("rg", "alice") and ("rga", "lice") cannot // collide. func (key *StmtDigestKey) Init(schemaName, digest, prevDigest, planDigest, resourceGroupName, user string) { From 1168048897221cc144a9752f4ab27a5e6d87e6d6 Mon Sep 17 00:00:00 2001 From: nolouch Date: Thu, 28 May 2026 01:31:59 -0700 Subject: [PATCH 6/6] util/stmtsummary: extract clear helper for stmt summary reset --- pkg/util/stmtsummary/statement_summary.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pkg/util/stmtsummary/statement_summary.go b/pkg/util/stmtsummary/statement_summary.go index f1f918234f33b..6196c1f4cca18 100644 --- a/pkg/util/stmtsummary/statement_summary.go +++ b/pkg/util/stmtsummary/statement_summary.go @@ -435,6 +435,11 @@ func (ssMap *stmtSummaryByDigestMap) Clear() { ssMap.Lock() defer ssMap.Unlock() + ssMap.clearLocked() +} + +// clearLocked removes all statement summaries. ssMap.Lock must be held. +func (ssMap *stmtSummaryByDigestMap) clearLocked() { ssMap.summaryMap.DeleteAll() ssMap.other.Clear() ssMap.beginTimeForCurInterval = 0 @@ -555,11 +560,7 @@ func (ssMap *stmtSummaryByDigestMap) SetGroupByUser(value bool) error { return nil } ssMap.optGroupByUser.Store(value) - ssMap.summaryMap.DeleteAll() - ssMap.other.Clear() - ssMap.beginTimeForCurInterval = 0 - ssMap.currentWindowEvictedCount = 0 - ssMap.updateMetricsLocked() + ssMap.clearLocked() return nil }