diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
index 8f8befd83480..e0acafcfa7c9 100644
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
+++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
@@ -1291,6 +1291,22 @@ public enum ConfVars {
"metastore.partition.management.table.pattern", "*",
"Automatic partition management will look for tables using the specified table pattern"),
+ COLUMN_STATISTICS_MANAGEMENT_TASK_FREQUENCY("metastore.column.statistics.management.task.frequency",
+ "metastore.column.statistics.management.task.frequency",
+ 7, TimeUnit.DAYS, "Frequency at which timer task runs to do automatic statistics \n" +
+ "management for tables. Statistics management include 2 configs. \n" +
+ "One is 'metastore.column.statistics.auto.deletion', and the other is 'metastore.column.statistics.retention.period'. \n" +
+ "When 'metastore.column.statistics.auto.deletion'='true' is set, statistics management will look for tables which their\n" +
+ "column statistics are over the retention period, and then delete the column stats. \n"),
+
+ COLUMN_STATISTICS_RETENTION_PERIOD("metastore.column.statistics.retention.period",
+ "metastore.column.statistics.retention.period", 365, TimeUnit.DAYS, "The retention period " +
+ "that we want to keep the stats for each table, which means if the stats are older than this period\n" +
+ "of time, the stats will be automatically deleted. \n"),
+
+ COLUMN_STATISTICS_AUTO_DELETION("metastore.column.statistics.auto.deletion", "metastore.column.statistics.auto.deletion", false,
+ "Whether table/partition column statistics will be auto deleted after retention period"),
+
METASTORE_METADATA_TRANSFORMER_CLASS("metastore.metadata.transformer.class", "metastore.metadata.transformer.class",
"org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer",
"Fully qualified class name for the metastore metadata transformer class \n"
@@ -1526,7 +1542,8 @@ public enum ConfVars {
ACID_METRICS_TASK_CLASS + "," + ACID_METRICS_LOGGER_CLASS + "," +
"org.apache.hadoop.hive.metastore.HiveProtoEventsCleanerTask" + ","
+ "org.apache.hadoop.hive.metastore.ScheduledQueryExecutionsMaintTask" + ","
- + "org.apache.hadoop.hive.metastore.ReplicationMetricsMaintTask",
+ + "org.apache.hadoop.hive.metastore.ReplicationMetricsMaintTask" + ","
+ + "org.apache.hadoop.hive.metastore.StatisticsManagementTask",
"Comma separated list of tasks that will be started in separate threads. These will " +
"always be started, regardless of whether the metastore is running in embedded mode " +
"or in server mode. They must implement " + METASTORE_TASK_THREAD_CLASS),
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatisticsManagementTask.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatisticsManagementTask.java
new file mode 100644
index 000000000000..8db2d8241f06
--- /dev/null
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatisticsManagementTask.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.metastore;
+
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.metastore.api.DeleteColumnStatisticsRequest;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
+import org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics;
+import org.apache.hadoop.hive.metastore.model.MTableColumnStatistics;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.jdo.PersistenceManager;
+import javax.jdo.Query;
+
+/**
+ * Statistics management task responsible for periodic auto-deletion of table and partition column
+ * statistics based on a configured retention interval.
+ *
+ *
When {@code metastore.statistics.auto.deletion} is enabled, this task scans
+ * {@code TAB_COL_STATS} and {@code PART_COL_STATS} for rows whose {@code lastAnalyzed} timestamp
+ * is older than {@code metastore.statistics.retention.period}, and deletes them.
+ * Individual tables may opt out by setting the table property
+ * {@value #STATISTICS_AUTO_DELETION_EXCLUDE_TBLPROPERTY} to any non-null value.
+ */
+public class StatisticsManagementTask extends ObjectStore implements MetastoreTaskThread {
+
+ private static final Logger LOG = LoggerFactory.getLogger(StatisticsManagementTask.class);
+
+ /**
+ * Table property key that, when present on a table, excludes it from automatic statistics
+ * deletion regardless of the global retention setting.
+ */
+ public static final String STATISTICS_AUTO_DELETION_EXCLUDE_TBLPROPERTY =
+ "statistics.auto.deletion.exclude";
+
+ private static final Lock LOCK = new ReentrantLock();
+
+ @Override
+ public long runFrequency(TimeUnit unit) {
+ return MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.COLUMN_STATISTICS_MANAGEMENT_TASK_FREQUENCY, unit);
+ }
+
+ @Override
+ public void setConf(Configuration configuration) {
+ this.conf = new Configuration(configuration);
+ super.setConf(configuration);
+ }
+
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ @Override
+ public void run() {
+ LOG.debug("Auto statistics deletion started. Cleaning up table/partition column statistics over the retention period.");
+ long retentionMillis =
+ MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.COLUMN_STATISTICS_RETENTION_PERIOD, TimeUnit.MILLISECONDS);
+ if (retentionMillis <= 0 || !MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.COLUMN_STATISTICS_AUTO_DELETION)) {
+ LOG.info("Statistics auto deletion is set to off currently.");
+ return;
+ }
+ if (!LOCK.tryLock()) {
+ return;
+ }
+ try {
+ long now = System.currentTimeMillis();
+ long lastAnalyzedThreshold = (now - retentionMillis) / 1000;
+ PersistenceManager pm = getPersistenceManager();
+ boolean committed = false;
+ openTransaction();
+ try {
+ try (IMetaStoreClient msc = new HiveMetaStoreClient(conf)) {
+ deleteExpiredTableColStats(pm, msc, lastAnalyzedThreshold);
+ deleteExpiredPartitionColStats(pm, msc, lastAnalyzedThreshold);
+ }
+ committed = commitTransaction();
+ } finally {
+ if (!committed) {
+ rollbackTransaction();
+ }
+ }
+ } catch (Exception e) {
+ LOG.error("Error during statistics auto deletion", e);
+ } finally {
+ LOCK.unlock();
+ }
+ }
+
+ /**
+ * Deletes expired table-level column statistics from {@code TAB_COL_STATS}.
+ * Tables with the {@value #STATISTICS_AUTO_DELETION_EXCLUDE_TBLPROPERTY} property set are skipped.
+ *
+ * @param pm the JDO persistence manager to use for the query
+ * @param msc the metastore client used to issue delete requests
+ * @param lastAnalyzedThreshold epoch seconds; rows with lastAnalyzed below this value are expired
+ * @throws Exception if the JDO query or the delete request fails
+ */
+ private void deleteExpiredTableColStats(PersistenceManager pm, IMetaStoreClient msc,
+ long lastAnalyzedThreshold) throws Exception {
+
+ try (Query tblQuery = pm.newQuery(MTableColumnStatistics.class)) {
+ tblQuery.setFilter("lastAnalyzed < threshold");
+ tblQuery.declareParameters("long threshold");
+ // partitionName does not exist on MTableColumnStatistics; omitted here
+ tblQuery.setResult(
+ "table.database.name, "
+ + "table.tableName, "
+ + "table.parameters.get(\"" + STATISTICS_AUTO_DELETION_EXCLUDE_TBLPROPERTY + "\")");
+ @SuppressWarnings("unchecked")
+ List