Files
impala/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
skyyws 345ba685e7 IMPALA-11049: Added expr analyzed check in 'SimplifyCastExprRule.java'
We added a new expr rewrite rule 'SimplifyCastExprRule.java' in
IMPALA-10836. If expr is not analyzed, this rewrite rule would throw
an 'AnalysisException', this is due to 'orderByElements_' not been
analyzed. We try to substitute order by elements when creating
'SortInfo', but caused some other problems. So we only add expr
analyzed check in this rule to solve this problem. When adding other
expr rewrite rules in the future, we should also add this check.

Testing:
- Added test cases in 'explain-level3.test'

Change-Id: I2780e04a6d5a32e224cd0470cf6f166a832363ec
Reviewed-on: http://gerrit.cloudera.org:8080/18099
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2022-02-16 13:28:27 +00:00

146 lines
7.1 KiB
Plaintext

====
---- QUERY
# Explain a simple hash join query.
explain
select *
from tpch.lineitem join tpch.orders on l_orderkey = o_orderkey;
---- RESULTS: VERIFY_IS_EQUAL
row_regex:.*Max Per-Host Resource Reservation: Memory=[0-9.]*MB Threads=[0-9]*.*
row_regex:.*Per-Host Resource Estimates: Memory=[0-9.]*MB.*
'Analyzed query: SELECT * FROM tpch.lineitem INNER JOIN tpch.orders ON l_orderkey'
'= o_orderkey'
''
'F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=1
' PLAN-ROOT SINK'
' | output exprs: tpch.lineitem.l_orderkey, tpch.lineitem.l_partkey, tpch.lineitem.l_suppkey, tpch.lineitem.l_linenumber, tpch.lineitem.l_quantity, tpch.lineitem.l_extendedprice, tpch.lineitem.l_discount, tpch.lineitem.l_tax, tpch.lineitem.l_returnflag, tpch.lineitem.l_linestatus, tpch.lineitem.l_shipdate, tpch.lineitem.l_commitdate, tpch.lineitem.l_receiptdate, tpch.lineitem.l_shipinstruct, tpch.lineitem.l_shipmode, tpch.lineitem.l_comment, tpch.orders.o_orderkey, tpch.orders.o_custkey, tpch.orders.o_orderstatus, tpch.orders.o_totalprice, tpch.orders.o_orderdate, tpch.orders.o_orderpriority, tpch.orders.o_clerk, tpch.orders.o_shippriority, tpch.orders.o_comment'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B spill-buffer=[0-9.]*MB thread-reservation=0
' |'
' 04:EXCHANGE [UNPARTITIONED]'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
' tuple-ids=0,1 row-size=402B cardinality=5.76M'
' in pipelines: 00(GETNEXT)'
''
'F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3'
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=04, UNPARTITIONED]'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
' 02:HASH JOIN [INNER JOIN, BROADCAST]'
' | hash predicates: l_orderkey = o_orderkey'
' | fk/pk conjuncts: l_orderkey = o_orderkey'
' | runtime filters: RF000[bloom] <- o_orderkey'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B spill-buffer=[0-9.]*MB thread-reservation=.*
' | tuple-ids=0,1 row-size=402B cardinality=5.76M'
' | in pipelines: 00(GETNEXT), 01(OPEN)'
' |'
' |--03:EXCHANGE [BROADCAST]'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
' | tuple-ids=1 row-size=171B cardinality=1.50M'
' | in pipelines: 01(GETNEXT)'
' |'
' 00:SCAN HDFS [tpch.lineitem, RANDOM]'
row_regex:.*partitions=1/1 files=1 size=.*
' runtime filters: RF000[bloom] -> l_orderkey'
' stored statistics:'
row_regex:.*table: rows=[0-9.]*[A-Z]* size=.*
' columns: all'
row_regex:.*extrapolated-rows=disabled max-scan-range-rows=[0-9]*.*
' file formats: [TEXT]'
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=1.*
' tuple-ids=0 row-size=231B cardinality=6.00M'
' in pipelines: 00(GETNEXT)'
''
'F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2'
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
' DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=03, BROADCAST]'
row_regex:.* | mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
' 01:SCAN HDFS [tpch.orders, RANDOM]'
row_regex:.*partitions=1/1 files=1 size=.*
' stored statistics:'
row_regex:.*table: rows=[0-9.]*[A-Z]* size=.*
' columns: all'
row_regex:.* extrapolated-rows=disabled max-scan-range-rows=[0-9]*.*
' file formats: [TEXT]'
row_regex:.* mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
' tuple-ids=1 row-size=171B cardinality=1.50M'
' in pipelines: 01(GETNEXT)'
====
---- QUERY
# Tests the warning about missing table stats in the explain header.
explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col)
from functional_avro.alltypes t1
inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id)
left outer join functional_avro.alltypes t3 on (t2.id = t3.id)
where t1.month = 1 and t2.year = 2009 and t3.bool_col = false
---- RESULTS: VERIFY_IS_SUBSET
'WARNING: The following tables are missing relevant table and/or column statistics.'
'functional_avro.alltypes, functional_parquet.alltypessmall'
====
---- QUERY
# Tests whether all file formats are present in the explain string.
explain select * from functional.alltypesmixedformat;
---- RESULTS: VERIFY_IS_SUBSET
' file formats: [PARQUET, RC_FILE, SEQUENCE_FILE, TEXT]'
====
---- QUERY
# Tests whether file formats are present in the explain string in case of Iceberg tables.
explain select * from functional_parquet.iceberg_partitioned;
---- RESULTS: VERIFY_IS_SUBSET
' file formats: [PARQUET]'
====
---- QUERY
# Tests 'SimplifyCastExprRule' rewrite when group by and order by clauses contain cast exprs
# 'd_date' is STRING type, rewrite succeed
explain
select month(to_timestamp(cast(d_date AS string),'yyyy-MM-dd')) a
from tpcds_parquet.date_dim
group by month(to_timestamp(cast(d_date AS string),'yyyy-MM-dd'))
order by month(to_timestamp(cast(d_date AS string),'yyyy-MM-dd'))
---- RESULTS: VERIFY_IS_SUBSET
'Analyzed query: SELECT month(to_timestamp(d_date, 'yyyy-MM-dd')) a FROM'
'tpcds_parquet.date_dim GROUP BY month(to_timestamp(d_date, 'yyyy-MM-dd')) ORDER'
'BY month(to_timestamp(d_date, 'yyyy-MM-dd')) ASC'
====
---- QUERY
# Tests 'SimplifyCastExprRule' rewrite when group by and order by clauses contain aliases
# 'd_date' is STRING type, rewrite succeed
# Aliases in group by clauses will substituted by exprs
explain
select month(to_timestamp(cast(d_date AS string),'yyyy-MM-dd')) a
from tpcds_parquet.date_dim
group by a
order by a
---- RESULTS: VERIFY_IS_SUBSET
'Analyzed query: SELECT month(to_timestamp(d_date, 'yyyy-MM-dd')) a FROM'
'tpcds_parquet.date_dim GROUP BY month(to_timestamp(d_date, 'yyyy-MM-dd')) ORDER'
'BY a ASC'
====
---- QUERY
# Tests 'SimplifyCastExprRule' rewrite when group by and order by clauses contain exprs
# 'd_date_sk' is INT type, not rewrite
explain
select month(to_timestamp(cast(d_date_sk AS string),'yyyy-MM-dd')) a
from tpcds_parquet.date_dim
group by month(to_timestamp(cast(d_date_sk AS string),'yyyy-MM-dd'))
order by month(to_timestamp(cast(d_date_sk AS string),'yyyy-MM-dd'))
---- RESULTS: VERIFY_IS_SUBSET
'Analyzed query: SELECT month(to_timestamp(CAST(d_date_sk AS STRING),'
''yyyy-MM-dd')) a FROM tpcds_parquet.date_dim GROUP BY'
'month(to_timestamp(CAST(d_date_sk AS STRING), 'yyyy-MM-dd')) ORDER BY'
'month(to_timestamp(CAST(d_date_sk AS STRING), 'yyyy-MM-dd')) ASC'
====
---- QUERY
# Tests 'SimplifyCastExprRule' rewrite when group by and order by clauses contain aliases
# 'd_date_sk' is INT type, not rewrite
# Aliases in group by clauses will substituted by exprs
explain
select month(to_timestamp(cast(d_date_sk AS string),'yyyy-MM-dd')) a
from tpcds_parquet.date_dim
group by a
order by a
---- RESULTS: VERIFY_IS_SUBSET
'Analyzed query: SELECT month(to_timestamp(CAST(d_date_sk AS STRING),'
''yyyy-MM-dd')) a FROM tpcds_parquet.date_dim GROUP BY'
'month(to_timestamp(CAST(d_date_sk AS STRING), 'yyyy-MM-dd')) ORDER BY a ASC'
====