From a46662a2e68a5c84845a86df09c00a230383faea Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Sun, 24 Dec 2023 01:49:49 +0800 Subject: [PATCH] [Fix](statistics) Fix partition name NPE and sample for all table during auto analyze (#28916) Fix partition name NPE and sample for all table during auto analyze. Sample for all tables because getData may have latency, which may cause full analyze a huge table and use too much resource. Sample for all tables to avoid this. Will improve the strategy later. --- .../src/main/java/org/apache/doris/qe/SessionVariable.java | 2 +- .../java/org/apache/doris/statistics/StatisticConstants.java | 2 +- .../org/apache/doris/statistics/StatisticsAutoCollector.java | 2 +- .../java/org/apache/doris/statistics/HMSAnalysisTaskTest.java | 2 +- .../java/org/apache/doris/statistics/OlapAnalysisTaskTest.java | 2 +- .../apache/doris/statistics/StatisticsAutoCollectorTest.java | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 7a17fce9cad1ae..b64b986d78bf9a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -1259,7 +1259,7 @@ public void setMaxJoinNumberOfReorder(int maxJoinNumberOfReorder) { + "When enable_auto_sample is enabled, tables" + "larger than this value will automatically collect " + "statistics through sampling"}) - public long hugeTableLowerBoundSizeInBytes = 5L * 1024 * 1024 * 1024; + public long hugeTableLowerBoundSizeInBytes = 0; @VariableMgr.VarAttr(name = HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS, flag = VariableMgr.GLOBAL, description = {"控制对大表的自动ANALYZE的最小时间间隔," diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index 7dae2761924e59..350b6b4fba5787 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -86,7 +86,7 @@ public class StatisticConstants { public static final int INSERT_MERGE_ITEM_COUNT = 200; public static final long HUGE_TABLE_DEFAULT_SAMPLE_ROWS = 4194304; - public static final long HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 5L * 1024 * 1024 * 1024; + public static final long HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 0; public static final long HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS = TimeUnit.HOURS.toMillis(0); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index a15b1eec417b85..2c78a5f7f32d71 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -148,7 +148,7 @@ protected boolean skip(TableIf table) { protected void createAnalyzeJobForTbl(DatabaseIf db, List analysisInfos, TableIf table) { - AnalysisMethod analysisMethod = table.getDataSize(true) > StatisticsUtil.getHugeTableLowerBoundSizeInBytes() + AnalysisMethod analysisMethod = table.getDataSize(true) >= StatisticsUtil.getHugeTableLowerBoundSizeInBytes() ? AnalysisMethod.SAMPLE : AnalysisMethod.FULL; AnalysisInfo jobInfo = new AnalysisInfoBuilder() .setJobId(Env.getCurrentEnv().getNextId()) diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java index a569a5cb06d9dc..12a1a9c046b4d0 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java @@ -86,7 +86,7 @@ public void testAutoSampleSmallTable(@Mocked HMSExternalTable tableIf) new MockUp() { @Mock public long getDataSize(boolean singleReplica) { - return 1000; + return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1; } }; HMSAnalysisTask task = new HMSAnalysisTask(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java index 5b27c79c8633ed..e0b5a4b047892e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java @@ -101,7 +101,7 @@ public void testSample3(@Mocked OlapTable tbl) { @Mock public long getDataSize(boolean singleReplica) { - return 1000; + return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1; } }; diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index 0c0061ff13bc83..cc77557c8cedb0 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -349,7 +349,7 @@ public List getBaseSchema() { @Mock public long getDataSize(boolean singleReplica) { - return 1000; + return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1; } @Mock