mirror of
https://github.com/apache/impala.git
synced 2026-01-10 00:00:16 -05:00
We added a new expr rewrite rule 'SimplifyCastExprRule.java' in IMPALA-10836. If expr is not analyzed, this rewrite rule would throw an 'AnalysisException', this is due to 'orderByElements_' not been analyzed. We try to substitute order by elements when creating 'SortInfo', but caused some other problems. So we only add expr analyzed check in this rule to solve this problem. When adding other expr rewrite rules in the future, we should also add this check. Testing: - Added test cases in 'explain-level3.test' Change-Id: I2780e04a6d5a32e224cd0470cf6f166a832363ec Reviewed-on: http://gerrit.cloudera.org:8080/18099 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
146 lines
7.1 KiB
Plaintext
146 lines
7.1 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Explain a simple hash join query.
|
|
explain
|
|
select *
|
|
from tpch.lineitem join tpch.orders on l_orderkey = o_orderkey;
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
row_regex:.*Max Per-Host Resource Reservation: Memory=[0-9.]*MB Threads=[0-9]*.*
|
|
row_regex:.*Per-Host Resource Estimates: Memory=[0-9.]*MB.*
|
|
'Analyzed query: SELECT * FROM tpch.lineitem INNER JOIN tpch.orders ON l_orderkey'
|
|
'= o_orderkey'
|
|
''
|
|
'F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=1
|
|
' PLAN-ROOT SINK'
|
|
' | output exprs: tpch.lineitem.l_orderkey, tpch.lineitem.l_partkey, tpch.lineitem.l_suppkey, tpch.lineitem.l_linenumber, tpch.lineitem.l_quantity, tpch.lineitem.l_extendedprice, tpch.lineitem.l_discount, tpch.lineitem.l_tax, tpch.lineitem.l_returnflag, tpch.lineitem.l_linestatus, tpch.lineitem.l_shipdate, tpch.lineitem.l_commitdate, tpch.lineitem.l_receiptdate, tpch.lineitem.l_shipinstruct, tpch.lineitem.l_shipmode, tpch.lineitem.l_comment, tpch.orders.o_orderkey, tpch.orders.o_custkey, tpch.orders.o_orderstatus, tpch.orders.o_totalprice, tpch.orders.o_orderdate, tpch.orders.o_orderpriority, tpch.orders.o_clerk, tpch.orders.o_shippriority, tpch.orders.o_comment'
|
|
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B spill-buffer=[0-9.]*MB thread-reservation=0
|
|
' |'
|
|
' 04:EXCHANGE [UNPARTITIONED]'
|
|
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
|
|
' tuple-ids=0,1 row-size=402B cardinality=5.76M'
|
|
' in pipelines: 00(GETNEXT)'
|
|
''
|
|
'F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3'
|
|
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
|
|
' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=04, UNPARTITIONED]'
|
|
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
|
|
' 02:HASH JOIN [INNER JOIN, BROADCAST]'
|
|
' | hash predicates: l_orderkey = o_orderkey'
|
|
' | fk/pk conjuncts: l_orderkey = o_orderkey'
|
|
' | runtime filters: RF000[bloom] <- o_orderkey'
|
|
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B spill-buffer=[0-9.]*MB thread-reservation=.*
|
|
' | tuple-ids=0,1 row-size=402B cardinality=5.76M'
|
|
' | in pipelines: 00(GETNEXT), 01(OPEN)'
|
|
' |'
|
|
' |--03:EXCHANGE [BROADCAST]'
|
|
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
|
|
' | tuple-ids=1 row-size=171B cardinality=1.50M'
|
|
' | in pipelines: 01(GETNEXT)'
|
|
' |'
|
|
' 00:SCAN HDFS [tpch.lineitem, RANDOM]'
|
|
row_regex:.*partitions=1/1 files=1 size=.*
|
|
' runtime filters: RF000[bloom] -> l_orderkey'
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=[0-9.]*[A-Z]* size=.*
|
|
' columns: all'
|
|
row_regex:.*extrapolated-rows=disabled max-scan-range-rows=[0-9]*.*
|
|
' file formats: [TEXT]'
|
|
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=1.*
|
|
' tuple-ids=0 row-size=231B cardinality=6.00M'
|
|
' in pipelines: 00(GETNEXT)'
|
|
''
|
|
'F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2'
|
|
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
|
|
' DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=03, BROADCAST]'
|
|
row_regex:.* | mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
|
|
' 01:SCAN HDFS [tpch.orders, RANDOM]'
|
|
row_regex:.*partitions=1/1 files=1 size=.*
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=[0-9.]*[A-Z]* size=.*
|
|
' columns: all'
|
|
row_regex:.* extrapolated-rows=disabled max-scan-range-rows=[0-9]*.*
|
|
' file formats: [TEXT]'
|
|
row_regex:.* mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
|
|
' tuple-ids=1 row-size=171B cardinality=1.50M'
|
|
' in pipelines: 01(GETNEXT)'
|
|
====
|
|
---- QUERY
|
|
# Tests the warning about missing table stats in the explain header.
|
|
explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col)
|
|
from functional_avro.alltypes t1
|
|
inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id)
|
|
left outer join functional_avro.alltypes t3 on (t2.id = t3.id)
|
|
where t1.month = 1 and t2.year = 2009 and t3.bool_col = false
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'WARNING: The following tables are missing relevant table and/or column statistics.'
|
|
'functional_avro.alltypes, functional_parquet.alltypessmall'
|
|
====
|
|
---- QUERY
|
|
# Tests whether all file formats are present in the explain string.
|
|
explain select * from functional.alltypesmixedformat;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
' file formats: [PARQUET, RC_FILE, SEQUENCE_FILE, TEXT]'
|
|
====
|
|
---- QUERY
|
|
# Tests whether file formats are present in the explain string in case of Iceberg tables.
|
|
explain select * from functional_parquet.iceberg_partitioned;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
' file formats: [PARQUET]'
|
|
====
|
|
---- QUERY
|
|
# Tests 'SimplifyCastExprRule' rewrite when group by and order by clauses contain cast exprs
|
|
# 'd_date' is STRING type, rewrite succeed
|
|
explain
|
|
select month(to_timestamp(cast(d_date AS string),'yyyy-MM-dd')) a
|
|
from tpcds_parquet.date_dim
|
|
group by month(to_timestamp(cast(d_date AS string),'yyyy-MM-dd'))
|
|
order by month(to_timestamp(cast(d_date AS string),'yyyy-MM-dd'))
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Analyzed query: SELECT month(to_timestamp(d_date, 'yyyy-MM-dd')) a FROM'
|
|
'tpcds_parquet.date_dim GROUP BY month(to_timestamp(d_date, 'yyyy-MM-dd')) ORDER'
|
|
'BY month(to_timestamp(d_date, 'yyyy-MM-dd')) ASC'
|
|
====
|
|
---- QUERY
|
|
# Tests 'SimplifyCastExprRule' rewrite when group by and order by clauses contain aliases
|
|
# 'd_date' is STRING type, rewrite succeed
|
|
# Aliases in group by clauses will substituted by exprs
|
|
explain
|
|
select month(to_timestamp(cast(d_date AS string),'yyyy-MM-dd')) a
|
|
from tpcds_parquet.date_dim
|
|
group by a
|
|
order by a
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Analyzed query: SELECT month(to_timestamp(d_date, 'yyyy-MM-dd')) a FROM'
|
|
'tpcds_parquet.date_dim GROUP BY month(to_timestamp(d_date, 'yyyy-MM-dd')) ORDER'
|
|
'BY a ASC'
|
|
====
|
|
---- QUERY
|
|
# Tests 'SimplifyCastExprRule' rewrite when group by and order by clauses contain exprs
|
|
# 'd_date_sk' is INT type, not rewrite
|
|
explain
|
|
select month(to_timestamp(cast(d_date_sk AS string),'yyyy-MM-dd')) a
|
|
from tpcds_parquet.date_dim
|
|
group by month(to_timestamp(cast(d_date_sk AS string),'yyyy-MM-dd'))
|
|
order by month(to_timestamp(cast(d_date_sk AS string),'yyyy-MM-dd'))
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Analyzed query: SELECT month(to_timestamp(CAST(d_date_sk AS STRING),'
|
|
''yyyy-MM-dd')) a FROM tpcds_parquet.date_dim GROUP BY'
|
|
'month(to_timestamp(CAST(d_date_sk AS STRING), 'yyyy-MM-dd')) ORDER BY'
|
|
'month(to_timestamp(CAST(d_date_sk AS STRING), 'yyyy-MM-dd')) ASC'
|
|
====
|
|
---- QUERY
|
|
# Tests 'SimplifyCastExprRule' rewrite when group by and order by clauses contain aliases
|
|
# 'd_date_sk' is INT type, not rewrite
|
|
# Aliases in group by clauses will substituted by exprs
|
|
explain
|
|
select month(to_timestamp(cast(d_date_sk AS string),'yyyy-MM-dd')) a
|
|
from tpcds_parquet.date_dim
|
|
group by a
|
|
order by a
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Analyzed query: SELECT month(to_timestamp(CAST(d_date_sk AS STRING),'
|
|
''yyyy-MM-dd')) a FROM tpcds_parquet.date_dim GROUP BY'
|
|
'month(to_timestamp(CAST(d_date_sk AS STRING), 'yyyy-MM-dd')) ORDER BY a ASC'
|
|
====
|