Files
impala/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test
xqhe d47d305bf4 IMPALA-11418: A statement that returns at most one row need not to spool results
A query that returns at most one row can run more efficiently without
result spooling. If result spooling is enabled, it will set the
minimum memory reservation in PlanRootSink, e.g. 'select 1' minimum
memory reservation is 4MB.

This optimization can reduce the statement's resource reservation and
prevent the exception 'Failed to get minimum memory reservation' when
the host memory limit not available.

Testing:
- Add tests in result-spooling.test

Change-Id: Icd4d73c21106048df68a270cf03d4abd56bd3aac
Reviewed-on: http://gerrit.cloudera.org:8080/18711
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2022-09-28 05:46:03 +00:00

96 lines
5.0 KiB
Plaintext

====
---- QUERY
# Explain a simple hash join query.
explain
select *
from tpch.lineitem join tpch.orders on l_orderkey = o_orderkey;
---- RESULTS: VERIFY_IS_EQUAL
row_regex:.*Max Per-Host Resource Reservation: Memory=[0-9.]*MB Threads=[0-9]*.*
row_regex:.*Per-Host Resource Estimates: Memory=[0-9.]*MB.*
'Analyzed query: SELECT * FROM tpch.lineitem INNER JOIN tpch.orders ON l_orderkey'
'= o_orderkey'
''
'F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=1
'PLAN-ROOT SINK'
'| output exprs: tpch.lineitem.l_orderkey, tpch.lineitem.l_partkey, tpch.lineitem.l_suppkey, tpch.lineitem.l_linenumber, tpch.lineitem.l_quantity, tpch.lineitem.l_extendedprice, tpch.lineitem.l_discount, tpch.lineitem.l_tax, tpch.lineitem.l_returnflag, tpch.lineitem.l_linestatus, tpch.lineitem.l_shipdate, tpch.lineitem.l_commitdate, tpch.lineitem.l_receiptdate, tpch.lineitem.l_shipinstruct, tpch.lineitem.l_shipmode, tpch.lineitem.l_comment, tpch.orders.o_orderkey, tpch.orders.o_custkey, tpch.orders.o_orderstatus, tpch.orders.o_totalprice, tpch.orders.o_orderdate, tpch.orders.o_orderpriority, tpch.orders.o_clerk, tpch.orders.o_shippriority, tpch.orders.o_comment'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B spill-buffer=[0-9.]*MB thread-reservation=0
'|'
'04:EXCHANGE [UNPARTITIONED]'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
'| tuple-ids=0,1 row-size=402B cardinality=5.76M'
'| in pipelines: 00(GETNEXT)'
'|'
'F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3'
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
'02:HASH JOIN [INNER JOIN, BROADCAST]'
'| hash predicates: l_orderkey = o_orderkey'
'| fk/pk conjuncts: l_orderkey = o_orderkey'
'| runtime filters: RF000[bloom] <- o_orderkey'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B spill-buffer=[0-9.]*MB thread-reservation=0.*
'| tuple-ids=0,1 row-size=402B cardinality=5.76M'
'| in pipelines: 00(GETNEXT), 01(OPEN)'
'|'
'|--03:EXCHANGE [BROADCAST]'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
'| | tuple-ids=1 row-size=171B cardinality=1.50M'
'| | in pipelines: 01(GETNEXT)'
'| |'
'| F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2'
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
'| 01:SCAN HDFS [tpch.orders, RANDOM]'
row_regex:.*partitions=1/1 files=1 size=.*
'| stored statistics:'
row_regex:.*table: rows=[0-9.]*[A-Z]* size=.*
'| columns: all'
row_regex:.*extrapolated-rows=disabled max-scan-range-rows=[0-9]*.*
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=1.*
'| tuple-ids=1 row-size=171B cardinality=1.50M'
'| in pipelines: 01(GETNEXT)'
'|'
'00:SCAN HDFS [tpch.lineitem, RANDOM]'
row_regex:.*partitions=1/1 files=1 size=.*
' runtime filters: RF000[bloom] -> l_orderkey'
' stored statistics:'
row_regex:.*table: rows=[0-9.]*[A-Z]* size=.*
' columns: all'
row_regex:.*extrapolated-rows=disabled max-scan-range-rows=[0-9]*.*
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=1.*
' tuple-ids=0 row-size=231B cardinality=6.00M'
' in pipelines: 00(GETNEXT)'
====
---- QUERY
# Tests the warning about missing table stats in the explain header.
explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col)
from functional_avro.alltypes t1
inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id)
left outer join functional_avro.alltypes t3 on (t2.id = t3.id)
where t1.month = 1 and t2.year = 2009 and t3.bool_col = false
---- RESULTS: VERIFY_IS_SUBSET
'Per-Host Resource Estimates: Memory=54MB'
'WARNING: The following tables are missing relevant table and/or column statistics.'
'functional_avro.alltypes, functional_parquet.alltypessmall'
====
---- QUERY
# Tests the warning about missing table stats in the explain header.
# Disable the estimation of cardinality for an hdfs table withot stats.
set DISABLE_HDFS_NUM_ROWS_ESTIMATE=1;
explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col)
from functional_avro.alltypes t1
inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id)
left outer join functional_avro.alltypes t3 on (t2.id = t3.id)
where t1.month = 1 and t2.year = 2009 and t3.bool_col = false
---- RESULTS: VERIFY_IS_SUBSET
'Per-Host Resource Estimates: Memory=4.05GB'
'WARNING: The following tables are missing relevant table and/or column statistics.'
'functional_avro.alltypes, functional_parquet.alltypessmall'
====
---- QUERY
# Tests the set operation statement rewrite
explain select year, month from functional.alltypes
intersect
select year, month from functional.alltypes where year=2009;
---- RESULTS: VERIFY_IS_SUBSET
'Per-Host Resources: mem-estimate=13.97MB mem-reservation=5.88MB thread-reservation=1 runtime-filters-memory=2.00MB'
'04:HASH JOIN [LEFT SEMI JOIN, PARTITIONED]'
====