From 7fcd7cd64efbd3c70d5b46bf82d5652b2dbc70d2 Mon Sep 17 00:00:00 2001 From: Alex Behm Date: Fri, 7 Feb 2014 19:55:08 -0800 Subject: [PATCH] Add list of tables missing stats to explain header and mem-limit exceeded error. Change-Id: Ibe8f329d5513ae84a8134b9ddb3645fa174d8a66 Reviewed-on: http://gerrit.ent.cloudera.com:8080/1501 Reviewed-by: Alex Behm Tested-by: jenkins Reviewed-on: http://gerrit.ent.cloudera.com:8080/1880 --- be/src/runtime/runtime-state.cc | 7 ++ be/src/service/query-exec-state.cc | 27 +++++++ be/src/statestore/simple-scheduler.cc | 8 +- be/src/util/error-util.cc | 13 ++++ be/src/util/error-util.h | 6 ++ common/thrift/Frontend.thrift | 7 -- common/thrift/ImpalaInternalService.thrift | 4 + .../com/cloudera/impala/planner/Planner.java | 18 ++++- .../com/cloudera/impala/service/Frontend.java | 18 ++--- .../cloudera/impala/planner/PlannerTest.java | 21 ++++-- .../queries/QueryTest/explain-level0.test | 23 ++++++ .../queries/QueryTest/explain-level1.test | 41 ++++++++++ .../queries/QueryTest/explain-level2.test | 67 +++++++++++++++++ .../queries/QueryTest/explain-level3.test | 74 +++++++++++++++++++ .../QueryTest/partition-col-types.test | 18 +++++ 15 files changed, 323 insertions(+), 29 deletions(-) diff --git a/be/src/runtime/runtime-state.cc b/be/src/runtime/runtime-state.cc index a08526ae1..60bd32ff6 100644 --- a/be/src/runtime/runtime-state.cc +++ b/be/src/runtime/runtime-state.cc @@ -29,6 +29,7 @@ #include "util/cpu-info.h" #include "util/debug-util.h" #include "util/disk-info.h" +#include "util/error-util.h" #include "util/jni-util.h" #include "util/mem-info.h" @@ -235,12 +236,18 @@ Status RuntimeState::SetMemLimitExceeded(MemTracker* tracker, << " without exceeding limit." << endl; } + if (exec_env_->process_mem_tracker()->LimitExceeded()) { ss << exec_env_->process_mem_tracker()->LogUsage(); } else { ss << query_mem_tracker_->LogUsage(); } LogError(ss.str()); + // Add warning about missing stats. + if (query_ctxt_.__isset.tables_missing_stats + && !query_ctxt_.tables_missing_stats.empty()) { + LogError(GetTablesMissingStatsWarning(query_ctxt_.tables_missing_stats)); + } DCHECK(query_status_.IsMemLimitExceeded()); return query_status_; } diff --git a/be/src/service/query-exec-state.cc b/be/src/service/query-exec-state.cc index 1b4ed2d06..e2ed5a279 100644 --- a/be/src/service/query-exec-state.cc +++ b/be/src/service/query-exec-state.cc @@ -42,6 +42,12 @@ DECLARE_int64(max_result_cache_size); namespace impala { +// Keys into the info string map of the runtime profile referring to specific +// items used by CM for monitoring purposes. +static const string PER_HOST_MEM_KEY = "Estimated Per-Host Mem"; +static const string PER_HOST_VCORES_KEY = "Estimated Per-Host VCores"; +static const string TABLES_MISSING_STATS_KEY = "Tables Missing Stats"; + ImpalaServer::QueryExecState::QueryExecState( const TQueryContext& query_ctxt, ExecEnv* exec_env, Frontend* frontend, ImpalaServer* server, shared_ptr session) @@ -247,6 +253,27 @@ Status ImpalaServer::QueryExecState::ExecQueryOrDmlRequest( << "----------------"; summary_profile_.AddInfoString("Plan", plan_ss.str()); } + // Add info strings consumed by CM: Estimated mem/vcores and tables missing stats. + if (query_exec_request.__isset.per_host_mem_req) { + stringstream ss; + ss << query_exec_request.per_host_mem_req; + summary_profile_.AddInfoString(PER_HOST_MEM_KEY, ss.str()); + } + if (query_exec_request.__isset.per_host_vcores) { + stringstream ss; + ss << query_exec_request.per_host_vcores; + summary_profile_.AddInfoString(PER_HOST_VCORES_KEY, ss.str()); + } + if (query_exec_request.query_ctxt.__isset.tables_missing_stats && + !query_exec_request.query_ctxt.tables_missing_stats.empty()) { + stringstream ss; + const vector& tbls = query_exec_request.query_ctxt.tables_missing_stats; + for (int i = 0; i < tbls.size(); ++i) { + if (i != 0) ss << ","; + ss << tbls[i].db_name << "." << tbls[i].table_name; + } + summary_profile_.AddInfoString(TABLES_MISSING_STATS_KEY, ss.str()); + } // If desc_tbl is not set, query has SELECT with no FROM. In that // case, the query can only have a single fragment, and that fragment needs to be diff --git a/be/src/statestore/simple-scheduler.cc b/be/src/statestore/simple-scheduler.cc index 4556bbce6..cb17227b7 100644 --- a/be/src/statestore/simple-scheduler.cc +++ b/be/src/statestore/simple-scheduler.cc @@ -36,6 +36,7 @@ #include "util/uid-util.h" #include "util/container-util.h" #include "util/debug-util.h" +#include "util/error-util.h" #include "util/llama-util.h" #include "gen-cpp/ResourceBrokerService_types.h" @@ -758,9 +759,10 @@ Status SimpleScheduler::Schedule(Coordinator* coord, QuerySchedule* schedule) { reservation_request, schedule->reservation()); if (!status.ok()) { // Warn about missing table and/or column stats if necessary. - if (schedule->request().__isset.fe_error_msgs && - !schedule->request().fe_error_msgs.empty()) { - status.AddErrorMsg(schedule->request().fe_error_msgs[0]); + if(schedule->request().query_ctxt.__isset.tables_missing_stats && + !schedule->request().query_ctxt.tables_missing_stats.empty()) { + status.AddErrorMsg(GetTablesMissingStatsWarning( + schedule->request().query_ctxt.tables_missing_stats)); } return status; } diff --git a/be/src/util/error-util.cc b/be/src/util/error-util.cc index 1c9971dc9..ed28426f7 100644 --- a/be/src/util/error-util.cc +++ b/be/src/util/error-util.cc @@ -32,4 +32,17 @@ string GetStrErrMsg() { return ss.str(); } +string GetTablesMissingStatsWarning(const vector& tables_missing_stats) { + stringstream ss; + if (tables_missing_stats.empty()) return string(""); + ss << "WARNING: The following tables are missing relevant table and/or column " + << "statistics.\n"; + for (int i = 0; i < tables_missing_stats.size(); ++i) { + const TTableName& table_name = tables_missing_stats[i]; + if (i != 0) ss << ","; + ss << table_name.db_name << "." << table_name.table_name; + } + return ss.str(); +} + } diff --git a/be/src/util/error-util.h b/be/src/util/error-util.h index 50bcb404f..296429200 100644 --- a/be/src/util/error-util.h +++ b/be/src/util/error-util.h @@ -17,7 +17,9 @@ #define IMPALA_UTIL_ERROR_UTIL_H #include +#include #include +#include "gen-cpp/CatalogObjects_types.h" namespace impala { @@ -26,6 +28,10 @@ namespace impala { // Returns empty string if errno is 0. std::string GetStrErrMsg(); +// Returns an error message warning that the given table names are missing relevant +// table/and or column statistics. +std::string GetTablesMissingStatsWarning( + const std::vector& tables_missing_stats); } #endif diff --git a/common/thrift/Frontend.thrift b/common/thrift/Frontend.thrift index 5f340e402..37b36b43d 100644 --- a/common/thrift/Frontend.thrift +++ b/common/thrift/Frontend.thrift @@ -284,13 +284,6 @@ struct TQueryExecRequest { // Estimated per-host CPU requirements in YARN virtual cores. // Used for resource management. 11: optional i16 per_host_vcores - - // List of error/warning messages the FE uses to pass to the BE, so that they can be - // appended to BE error messages. The list is indexed (by convention) according to the - // specific error in the BE that the message should be appended to. Currently, only - // index 0 is used for a warning message about missing table and/or column stats - // relevant to this query. - 12: optional list fe_error_msgs } enum TCatalogOpType { diff --git a/common/thrift/ImpalaInternalService.thrift b/common/thrift/ImpalaInternalService.thrift index 7a15c2424..97150ac4e 100644 --- a/common/thrift/ImpalaInternalService.thrift +++ b/common/thrift/ImpalaInternalService.thrift @@ -133,6 +133,10 @@ struct TQueryContext { // Process ID of the impalad to which the user is connected. 4: required i32 pid + + // List of tables missing relevant table and/or column stats. Used for + // populating query-profile fields consumed by CM as well as warning messages. + 5: optional list tables_missing_stats } // A scan range plus the parameters needed to execute that scan. diff --git a/fe/src/main/java/com/cloudera/impala/planner/Planner.java b/fe/src/main/java/com/cloudera/impala/planner/Planner.java index e5d1bbab8..88e4791a4 100644 --- a/fe/src/main/java/com/cloudera/impala/planner/Planner.java +++ b/fe/src/main/java/com/cloudera/impala/planner/Planner.java @@ -62,6 +62,8 @@ import com.cloudera.impala.thrift.TExplainLevel; import com.cloudera.impala.thrift.TPartitionType; import com.cloudera.impala.thrift.TQueryExecRequest; import com.cloudera.impala.thrift.TQueryOptions; +import com.cloudera.impala.thrift.TTableName; +import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -175,12 +177,26 @@ public class Planner { public String getExplainString(ArrayList fragments, TQueryExecRequest request, TExplainLevel explainLevel) { StringBuilder str = new StringBuilder(); + boolean hasHeader = false; if (request.isSetPer_host_mem_req() && request.isSetPer_host_vcores()) { str.append( - String.format("Estimated Per-Host Requirements: Memory=%s VCores=%s\n\n", + String.format("Estimated Per-Host Requirements: Memory=%s VCores=%s\n", PrintUtils.printBytes(request.getPer_host_mem_req()), request.per_host_vcores)); + hasHeader = true; } + // Append warning about tables missing stats. + if (request.query_ctxt.isSetTables_missing_stats() && + !request.query_ctxt.getTables_missing_stats().isEmpty()) { + List tableNames = Lists.newArrayList(); + for (TTableName tableName: request.query_ctxt.getTables_missing_stats()) { + tableNames.add(tableName.db_name + "." + tableName.table_name); + } + str.append("WARNING: The following tables are missing relevant table " + + "and/or column statistics.\n" + Joiner.on(", ").join(tableNames) + "\n"); + hasHeader = true; + } + if (hasHeader) str.append("\n"); if (explainLevel.ordinal() < TExplainLevel.VERBOSE.ordinal()) { // Print the non-fragmented parallel plan. diff --git a/fe/src/main/java/com/cloudera/impala/service/Frontend.java b/fe/src/main/java/com/cloudera/impala/service/Frontend.java index ed1a74989..b75fc76c8 100644 --- a/fe/src/main/java/com/cloudera/impala/service/Frontend.java +++ b/fe/src/main/java/com/cloudera/impala/service/Frontend.java @@ -90,6 +90,7 @@ import com.cloudera.impala.thrift.TResultSetMetadata; import com.cloudera.impala.thrift.TStatus; import com.cloudera.impala.thrift.TStatusCode; import com.cloudera.impala.thrift.TStmtType; +import com.cloudera.impala.thrift.TTableName; import com.cloudera.impala.thrift.TUpdateCatalogCacheRequest; import com.cloudera.impala.thrift.TUpdateCatalogCacheResponse; import com.cloudera.impala.util.TResultRowBuilder; @@ -98,6 +99,7 @@ import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; /** * Frontend API for the impalad process. @@ -613,22 +615,19 @@ public class Frontend { // Set scan ranges/locations for scan nodes. // Also assemble list of tables names missing stats for assembling a warning message. - List tablesMissingStats = Lists.newArrayList(); LOG.debug("get scan range locations"); + Set tablesMissingStats = Sets.newTreeSet(); for (ScanNode scanNode: scanNodes) { queryExecRequest.putToPer_node_scan_ranges( scanNode.getId().asInt(), scanNode.getScanRangeLocations( queryCtxt.request.query_options.getMax_scan_range_length())); if (scanNode.isTableMissingStats()) { - tablesMissingStats.add(scanNode.getTupleDesc().getTable().getFullName()); + tablesMissingStats.add(scanNode.getTupleDesc().getTableName().toThrift()); } } - if (!tablesMissingStats.isEmpty()) { - String warnMsg = "Warning: The following tables are missing relevant table " + - "and/or column statistics leading to inaccurate resource estimates:\n" + - Joiner.on(", ").join(tablesMissingStats); - queryExecRequest.addToFe_error_msgs(warnMsg); + for (TTableName tableName: tablesMissingStats) { + queryCtxt.addToTables_missing_stats(tableName); } // Compute resource requirements after scan range locations because the cost @@ -644,6 +643,8 @@ public class Frontend { // Use the STANDARD by default for explain statements. explainLevel = TExplainLevel.STANDARD; } + // Global query parameters to be set in each TPlanExecRequest. + queryExecRequest.setQuery_ctxt(queryCtxt); explainString.append(planner.getExplainString(fragments, queryExecRequest, explainLevel)); @@ -658,9 +659,6 @@ public class Frontend { result.setQuery_exec_request(queryExecRequest); - // Global query parameters to be set in each TPlanExecRequest. - queryExecRequest.setQuery_ctxt(queryCtxt); - if (analysisResult.isQueryStmt()) { // fill in the metadata LOG.debug("create result set metadata"); diff --git a/fe/src/test/java/com/cloudera/impala/planner/PlannerTest.java b/fe/src/test/java/com/cloudera/impala/planner/PlannerTest.java index a1f6a306b..694e5cea6 100644 --- a/fe/src/test/java/com/cloudera/impala/planner/PlannerTest.java +++ b/fe/src/test/java/com/cloudera/impala/planner/PlannerTest.java @@ -187,7 +187,7 @@ public class PlannerTest { actualOutput.append(Section.PLAN.getHeader() + "\n"); try { execRequest = frontend_.createExecRequest(queryCtxt, explainBuilder); - String explainStr = removeResourceEstimates(explainBuilder.toString()); + String explainStr = removeExplainHeader(explainBuilder.toString()); actualOutput.append(explainStr); if (!isImplemented) { errorLog.append( @@ -266,7 +266,7 @@ public class PlannerTest { try { // distributed plan execRequest = frontend_.createExecRequest(queryCtxt, explainBuilder); - String explainStr = removeResourceEstimates(explainBuilder.toString()); + String explainStr = removeExplainHeader(explainBuilder.toString()); actualOutput.append(explainStr); if (!isImplemented) { errorLog.append( @@ -302,13 +302,18 @@ public class PlannerTest { } /** - * Strips out the header containing resource estimates from the given explain plan, - * because the estimates can change easily with stats/cardinality. + * Strips out the header containing resource estimates and the warning about missing + * stats from the given explain plan, because the estimates can change easily with + * stats/cardinality. */ - private String removeResourceEstimates(String explain) { - if (explain.startsWith("Estimated Per-Host Requirements:")) { - String[] lines = explain.split("\n"); - return Joiner.on("\n").join(Arrays.copyOfRange(lines, 2, lines.length)) + "\n"; + private String removeExplainHeader(String explain) { + String[] lines = explain.split("\n"); + // Find the first empty line - the end of the header. + for (int i = 0; i < lines.length - 1; ++i) { + if (lines[i].isEmpty()) { + return Joiner.on("\n").join(Arrays.copyOfRange(lines, i + 1 , lines.length)) + + "\n"; + } } return explain; } diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level0.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level0.test index 5e6c5baf1..c5d367d25 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/explain-level0.test +++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level0.test @@ -109,4 +109,27 @@ where month = 2 '' 'WRITE TO HDFS [functional.t, OVERWRITE=false]' '00:SCAN HDFS [functional.alltypes]' +==== +---- QUERY +# Tests the warning about missing table stats in the explain header. +explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col) +from functional_avro.alltypes t1 + inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id) + left outer join functional_avro.alltypes t3 on (t2.id = t3.id) +where t1.month = 1 and t2.year = 2009 and t3.bool_col = false +---- RESULTS +'Estimated Per-Host Requirements: Memory=4.03GB VCores=3' +'WARNING: The following tables are missing relevant table and/or column statistics.' +'functional_avro.alltypes, functional_parquet.alltypessmall' +'' +'09:AGGREGATE [MERGE FINALIZE]' +'08:EXCHANGE [PARTITION=UNPARTITIONED]' +'05:AGGREGATE' +'04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]' +'|--07:EXCHANGE [BROADCAST]' +'| 02:SCAN HDFS [functional_avro.alltypes t3]' +'03:HASH JOIN [INNER JOIN, BROADCAST]' +'|--06:EXCHANGE [BROADCAST]' +'| 01:SCAN HDFS [functional_parquet.alltypessmall t2]' +'00:SCAN HDFS [functional_avro.alltypes t1]' ==== \ No newline at end of file diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test index bd4fef778..7495926ad 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test +++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test @@ -174,4 +174,45 @@ where month = 2 '|' '00:SCAN HDFS [functional.alltypes]' ' partitions=2/24 size=36.51KB' +==== +---- QUERY +# Tests the warning about missing table stats in the explain header. +explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col) +from functional_avro.alltypes t1 + inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id) + left outer join functional_avro.alltypes t3 on (t2.id = t3.id) +where t1.month = 1 and t2.year = 2009 and t3.bool_col = false +---- RESULTS +'Estimated Per-Host Requirements: Memory=4.03GB VCores=3' +'WARNING: The following tables are missing relevant table and/or column statistics.' +'functional_avro.alltypes, functional_parquet.alltypessmall' +'' +'09:AGGREGATE [MERGE FINALIZE]' +'| output: sum(count(t1.int_col)), sum(sum(t2.float_col)), sum(count(t2.float_col)), sum(sum(t3.bigint_col))' +'|' +'08:EXCHANGE [PARTITION=UNPARTITIONED]' +'|' +'05:AGGREGATE' +'| output: count(t1.int_col), sum(t2.float_col), count(t2.float_col), sum(t3.bigint_col)' +'|' +'04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]' +'| hash predicates: t2.id = t3.id' +'| other predicates: t3.bool_col = FALSE' +'|' +'|--07:EXCHANGE [BROADCAST]' +'| |' +'| 02:SCAN HDFS [functional_avro.alltypes t3]' +'| partitions=24/24 size=470.35KB' +'| predicates: t3.bool_col = FALSE' +'|' +'03:HASH JOIN [INNER JOIN, BROADCAST]' +'| hash predicates: t1.id = t2.id' +'|' +'|--06:EXCHANGE [BROADCAST]' +'| |' +'| 01:SCAN HDFS [functional_parquet.alltypessmall t2]' +'| partitions=4/4 size=9.63KB' +'|' +'00:SCAN HDFS [functional_avro.alltypes t1]' +' partitions=2/24 size=39.87KB' ==== \ No newline at end of file diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test index 9a5b3d220..0b6bbc394 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test +++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test @@ -280,3 +280,70 @@ where month = 2 ' hosts=3 per-host-mem=16.00MB' ' tuple-ids=1 row-size=97B cardinality=560' ==== +---- QUERY +# Tests the warning about missing table stats in the explain header. +explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col) +from functional_avro.alltypes t1 + inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id) + left outer join functional_avro.alltypes t3 on (t2.id = t3.id) +where t1.month = 1 and t2.year = 2009 and t3.bool_col = false +---- RESULTS +'Estimated Per-Host Requirements: Memory=4.03GB VCores=3' +'WARNING: The following tables are missing relevant table and/or column statistics.' +'functional_avro.alltypes, functional_parquet.alltypessmall' +'' +'09:AGGREGATE [MERGE FINALIZE]' +'| output: sum(count(t1.int_col)), sum(sum(t2.float_col)), sum(count(t2.float_col)), sum(sum(t3.bigint_col))' +'| hosts=3 per-host-mem=unavailable' +'| tuple-ids=3 row-size=32B cardinality=1' +'|' +'08:EXCHANGE [PARTITION=UNPARTITIONED]' +'| hosts=3 per-host-mem=unavailable' +'| tuple-ids=3 row-size=32B cardinality=1' +'|' +'05:AGGREGATE' +'| output: count(t1.int_col), sum(t2.float_col), count(t2.float_col), sum(t3.bigint_col)' +'| hosts=3 per-host-mem=10.00MB' +'| tuple-ids=3 row-size=32B cardinality=1' +'|' +'04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]' +'| hash predicates: t2.id = t3.id' +'| other predicates: t3.bool_col = FALSE' +'| hosts=3 per-host-mem=2.00GB' +'| tuple-ids=0,1,2N row-size=37B cardinality=unavailable' +'|' +'|--07:EXCHANGE [BROADCAST]' +'| | hosts=3 per-host-mem=0B' +'| | tuple-ids=2 row-size=13B cardinality=unavailable' +'| |' +'| 02:SCAN HDFS [functional_avro.alltypes t3, PARTITION=RANDOM]' +'| partitions=24/24 size=470.35KB' +'| predicates: t3.bool_col = FALSE' +'| table stats: unavailable' +'| column stats: unavailable' +'| hosts=3 per-host-mem=16.00MB' +'| tuple-ids=2 row-size=13B cardinality=unavailable' +'|' +'03:HASH JOIN [INNER JOIN, BROADCAST]' +'| hash predicates: t1.id = t2.id' +'| hosts=3 per-host-mem=2.00GB' +'| tuple-ids=0,1 row-size=24B cardinality=unavailable' +'|' +'|--06:EXCHANGE [BROADCAST]' +'| | hosts=3 per-host-mem=0B' +'| | tuple-ids=1 row-size=12B cardinality=unavailable' +'| |' +'| 01:SCAN HDFS [functional_parquet.alltypessmall t2, PARTITION=RANDOM]' +'| partitions=4/4 size=9.63KB' +'| table stats: unavailable' +'| columns missing stats: id, float_col' +'| hosts=3 per-host-mem=16.00MB' +'| tuple-ids=1 row-size=12B cardinality=unavailable' +'|' +'00:SCAN HDFS [functional_avro.alltypes t1, PARTITION=RANDOM]' +' partitions=2/24 size=39.87KB' +' table stats: unavailable' +' columns missing stats: id, int_col' +' hosts=3 per-host-mem=16.00MB' +' tuple-ids=0 row-size=12B cardinality=unavailable' +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test index df5a0edb0..2c555fb5f 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test +++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test @@ -312,3 +312,77 @@ where month = 2 ' hosts=3 per-host-mem=16.00MB' ' tuple-ids=1 row-size=97B cardinality=560' ==== +---- QUERY +# Tests the warning about missing table stats in the explain header. +explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col) +from functional_avro.alltypes t1 + inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id) + left outer join functional_avro.alltypes t3 on (t2.id = t3.id) +where t1.month = 1 and t2.year = 2009 and t3.bool_col = false +---- RESULTS +'Estimated Per-Host Requirements: Memory=4.03GB VCores=3' +'WARNING: The following tables are missing relevant table and/or column statistics.' +'functional_avro.alltypes, functional_parquet.alltypessmall' +'' +'F03:PLAN FRAGMENT [PARTITION=UNPARTITIONED]' +' 09:AGGREGATE [MERGE FINALIZE]' +' | output: sum(count(t1.int_col)), sum(sum(t2.float_col)), sum(count(t2.float_col)), sum(sum(t3.bigint_col))' +' | hosts=3 per-host-mem=unavailable' +' | tuple-ids=3 row-size=32B cardinality=1' +' |' +' 08:EXCHANGE [PARTITION=UNPARTITIONED]' +' hosts=3 per-host-mem=unavailable' +' tuple-ids=3 row-size=32B cardinality=1' +'' +'F00:PLAN FRAGMENT [PARTITION=RANDOM]' +' DATASTREAM SINK [FRAGMENT=F03, EXCHANGE=08, PARTITION=UNPARTITIONED]' +' 05:AGGREGATE' +' | output: count(t1.int_col), sum(t2.float_col), count(t2.float_col), sum(t3.bigint_col)' +' | hosts=3 per-host-mem=10.00MB' +' | tuple-ids=3 row-size=32B cardinality=1' +' |' +' 04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]' +' | hash predicates: t2.id = t3.id' +' | other predicates: t3.bool_col = FALSE' +' | hosts=3 per-host-mem=2.00GB' +' | tuple-ids=0,1,2N row-size=37B cardinality=unavailable' +' |' +' |--07:EXCHANGE [BROADCAST]' +' | hosts=3 per-host-mem=0B' +' | tuple-ids=2 row-size=13B cardinality=unavailable' +' |' +' 03:HASH JOIN [INNER JOIN, BROADCAST]' +' | hash predicates: t1.id = t2.id' +' | hosts=3 per-host-mem=2.00GB' +' | tuple-ids=0,1 row-size=24B cardinality=unavailable' +' |' +' |--06:EXCHANGE [BROADCAST]' +' | hosts=3 per-host-mem=0B' +' | tuple-ids=1 row-size=12B cardinality=unavailable' +' |' +' 00:SCAN HDFS [functional_avro.alltypes t1, PARTITION=RANDOM]' +' partitions=2/24 size=39.87KB' +' table stats: unavailable' +' columns missing stats: id, int_col' +' hosts=3 per-host-mem=16.00MB' +' tuple-ids=0 row-size=12B cardinality=unavailable' +'' +'F02:PLAN FRAGMENT [PARTITION=RANDOM]' +' DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=07, BROADCAST]' +' 02:SCAN HDFS [functional_avro.alltypes t3, PARTITION=RANDOM]' +' partitions=24/24 size=470.35KB' +' predicates: t3.bool_col = FALSE' +' table stats: unavailable' +' column stats: unavailable' +' hosts=3 per-host-mem=16.00MB' +' tuple-ids=2 row-size=13B cardinality=unavailable' +'' +'F01:PLAN FRAGMENT [PARTITION=RANDOM]' +' DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=06, BROADCAST]' +' 01:SCAN HDFS [functional_parquet.alltypessmall t2, PARTITION=RANDOM]' +' partitions=4/4 size=9.63KB' +' table stats: unavailable' +' columns missing stats: id, float_col' +' hosts=3 per-host-mem=16.00MB' +' tuple-ids=1 row-size=12B cardinality=unavailable' +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/partition-col-types.test b/testdata/workloads/functional-query/queries/QueryTest/partition-col-types.test index 1dab8deed..ac84a5649 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/partition-col-types.test +++ b/testdata/workloads/functional-query/queries/QueryTest/partition-col-types.test @@ -82,6 +82,8 @@ WHERE tinyint_col < 7 AND smallint_col < 6 AND int_col < 5 AND bigint_col < 40 A string_col in ('1', '2', '3') ---- RESULTS 'Estimated Per-Host Requirements: Memory=32.00MB VCores=1' +'WARNING: The following tables are missing relevant table and/or column statistics.' +'hdfs_partitioning.all_insert_partition_col_types' '' '01:EXCHANGE [PARTITION=UNPARTITIONED]' '|' @@ -94,6 +96,8 @@ SELECT * from all_insert_partition_col_types WHERE tinyint_col < 7 ---- RESULTS 'Estimated Per-Host Requirements: Memory=48.00MB VCores=1' +'WARNING: The following tables are missing relevant table and/or column statistics.' +'hdfs_partitioning.all_insert_partition_col_types' '' '01:EXCHANGE [PARTITION=UNPARTITIONED]' '|' @@ -106,6 +110,8 @@ SELECT * from all_insert_partition_col_types WHERE smallint_col < 6 ---- RESULTS 'Estimated Per-Host Requirements: Memory=48.00MB VCores=1' +'WARNING: The following tables are missing relevant table and/or column statistics.' +'hdfs_partitioning.all_insert_partition_col_types' '' '01:EXCHANGE [PARTITION=UNPARTITIONED]' '|' @@ -118,6 +124,8 @@ SELECT * from all_insert_partition_col_types WHERE int_col < 5 ---- RESULTS 'Estimated Per-Host Requirements: Memory=32.00MB VCores=1' +'WARNING: The following tables are missing relevant table and/or column statistics.' +'hdfs_partitioning.all_insert_partition_col_types' '' '01:EXCHANGE [PARTITION=UNPARTITIONED]' '|' @@ -130,6 +138,8 @@ SELECT * from all_insert_partition_col_types WHERE bigint_col < 40 ---- RESULTS 'Estimated Per-Host Requirements: Memory=32.00MB VCores=1' +'WARNING: The following tables are missing relevant table and/or column statistics.' +'hdfs_partitioning.all_insert_partition_col_types' '' '01:EXCHANGE [PARTITION=UNPARTITIONED]' '|' @@ -142,6 +152,8 @@ SELECT * from all_insert_partition_col_types WHERE string_col in ('1', '2', '3') ---- RESULTS 'Estimated Per-Host Requirements: Memory=32.00MB VCores=1' +'WARNING: The following tables are missing relevant table and/or column statistics.' +'hdfs_partitioning.all_insert_partition_col_types' '' '01:EXCHANGE [PARTITION=UNPARTITIONED]' '|' @@ -154,6 +166,8 @@ SELECT * from all_insert_partition_col_types WHERE double_col = 1.1 ---- RESULTS 'Estimated Per-Host Requirements: Memory=64.00MB VCores=1' +'WARNING: The following tables are missing relevant table and/or column statistics.' +'hdfs_partitioning.all_insert_partition_col_types' '' '01:EXCHANGE [PARTITION=UNPARTITIONED]' '|' @@ -166,6 +180,8 @@ SELECT * from all_insert_partition_col_types WHERE float_col = 2 ---- RESULTS 'Estimated Per-Host Requirements: Memory=32.00MB VCores=1' +'WARNING: The following tables are missing relevant table and/or column statistics.' +'hdfs_partitioning.all_insert_partition_col_types' '' '01:EXCHANGE [PARTITION=UNPARTITIONED]' '|' @@ -223,6 +239,8 @@ SELECT * FROM all_partition_col_types WHERE bool_col=false ---- RESULTS 'Estimated Per-Host Requirements: Memory=32.00MB VCores=1' +'WARNING: The following tables are missing relevant table and/or column statistics.' +'hdfs_partitioning.all_partition_col_types' '' '01:EXCHANGE [PARTITION=UNPARTITIONED]' '|'