Files
impala/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
Daniel Becker 301d7ebe75 IMPALA-10332: Add file formats to HdfsScanNode's thrift representation.
List all file formats that a HdfsScanNode needs to process in any
fragment instance. It is possible that some file formats will not be
needed in all fragment instances.

This is a step towards sharing codegen between different impala
backends. Using the file formats provided in the thrift file, a backend
can codegen code for file formats that are not needed in its own process
but are needed in other fragment instances running on other backends,
and the resulting binary can be shared between multiple backends.

Codegenning for file formats will be done based on the thrift message
and not on what is needed for the actual backend. This leads to some
extra work in case a file format is not needed for the current backend
and codegen sharing is not available (at this point it is not
implemented). However, the overall number of such cases is low.

Also adding the file formats to the node's explain string at level 3.

Testing:
 - Added tests to verify that the file formats are present in the
   explain string at level 3.

Change-Id: Iad6b8271bd248983f327c07883a3bedf50f25b5d
Reviewed-on: http://gerrit.cloudera.org:8080/16728
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Csaba Ringhofer <csringhofer@cloudera.com>
2020-11-20 17:54:08 +00:00

91 lines
4.7 KiB
Plaintext

====
---- QUERY
# Explain a simple hash join query.
explain
select *
from tpch.lineitem join tpch.orders on l_orderkey = o_orderkey;
---- RESULTS: VERIFY_IS_EQUAL
row_regex:.*Max Per-Host Resource Reservation: Memory=[0-9.]*MB Threads=[0-9]*.*
row_regex:.*Per-Host Resource Estimates: Memory=[0-9.]*MB.*
'Analyzed query: SELECT * FROM tpch.lineitem INNER JOIN tpch.orders ON l_orderkey'
'= o_orderkey'
''
'F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=1
' PLAN-ROOT SINK'
' | output exprs: tpch.lineitem.l_orderkey, tpch.lineitem.l_partkey, tpch.lineitem.l_suppkey, tpch.lineitem.l_linenumber, tpch.lineitem.l_quantity, tpch.lineitem.l_extendedprice, tpch.lineitem.l_discount, tpch.lineitem.l_tax, tpch.lineitem.l_returnflag, tpch.lineitem.l_linestatus, tpch.lineitem.l_shipdate, tpch.lineitem.l_commitdate, tpch.lineitem.l_receiptdate, tpch.lineitem.l_shipinstruct, tpch.lineitem.l_shipmode, tpch.lineitem.l_comment, tpch.orders.o_orderkey, tpch.orders.o_custkey, tpch.orders.o_orderstatus, tpch.orders.o_totalprice, tpch.orders.o_orderdate, tpch.orders.o_orderpriority, tpch.orders.o_clerk, tpch.orders.o_shippriority, tpch.orders.o_comment'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
' |'
' 04:EXCHANGE [UNPARTITIONED]'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
' tuple-ids=0,1 row-size=402B cardinality=5.76M'
' in pipelines: 00(GETNEXT)'
''
'F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3'
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=04, UNPARTITIONED]'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
' 02:HASH JOIN [INNER JOIN, BROADCAST]'
' | hash predicates: l_orderkey = o_orderkey'
' | fk/pk conjuncts: l_orderkey = o_orderkey'
' | runtime filters: RF000[bloom] <- o_orderkey'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B spill-buffer=[0-9.]*MB thread-reservation=.*
' | tuple-ids=0,1 row-size=402B cardinality=5.76M'
' | in pipelines: 00(GETNEXT), 01(OPEN)'
' |'
' |--03:EXCHANGE [BROADCAST]'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
' | tuple-ids=1 row-size=171B cardinality=1.50M'
' | in pipelines: 01(GETNEXT)'
' |'
' 00:SCAN HDFS [tpch.lineitem, RANDOM]'
row_regex:.*partitions=1/1 files=1 size=.*
' runtime filters: RF000[bloom] -> l_orderkey'
' stored statistics:'
row_regex:.*table: rows=[0-9.]*[A-Z]* size=.*
' columns: all'
row_regex:.*extrapolated-rows=disabled max-scan-range-rows=[0-9]*.*
' file formats: [TEXT]'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=1.*
' tuple-ids=0 row-size=231B cardinality=6.00M'
' in pipelines: 00(GETNEXT)'
''
'F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2'
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
' DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=03, BROADCAST]'
row_regex:.* | mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
' 01:SCAN HDFS [tpch.orders, RANDOM]'
row_regex:.*partitions=1/1 files=1 size=.*
' stored statistics:'
row_regex:.*table: rows=[0-9.]*[A-Z]* size=.*
' columns: all'
row_regex:.* extrapolated-rows=disabled max-scan-range-rows=[0-9]*.*
' file formats: [TEXT]'
row_regex:.* mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
' tuple-ids=1 row-size=171B cardinality=1.50M'
' in pipelines: 01(GETNEXT)'
====
---- QUERY
# Tests the warning about missing table stats in the explain header.
explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col)
from functional_avro.alltypes t1
inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id)
left outer join functional_avro.alltypes t3 on (t2.id = t3.id)
where t1.month = 1 and t2.year = 2009 and t3.bool_col = false
---- RESULTS: VERIFY_IS_SUBSET
'WARNING: The following tables are missing relevant table and/or column statistics.'
'functional_avro.alltypes, functional_parquet.alltypessmall'
====
---- QUERY
# Tests whether all file formats are present in the explain string.
explain select * from functional.alltypesmixedformat;
---- RESULTS: VERIFY_IS_SUBSET
' file formats: [PARQUET, RC_FILE, SEQUENCE_FILE, TEXT]'
====
---- QUERY
# Tests whether file formats are present in the explain string in case of Iceberg tables.
explain select * from functional_parquet.iceberg_partitioned;
---- RESULTS: VERIFY_IS_SUBSET
' file formats: [PARQUET]'
====