mirror of
https://github.com/apache/impala.git
synced 2026-01-22 18:02:34 -05:00
Enables tests guarded by SkipIfNotHdfsMinicluster to run on Ozone as well as HDFS. Plans are still skipped for Ozone because there's Ozone-specific text in the plan output. Updates explain output to allow for Ozone, which has a block size of 256MB instead of 128MB. One of the partitions read in test_explain is ~180MB, straddling the difference between Ozone and HDFS. Testing: ran affected tests with Ozone. Change-Id: I6b06ceacf951dbc966aa409cf24a310c9676fe7f Reviewed-on: http://gerrit.cloudera.org:8080/19250 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com>
146 lines
7.1 KiB
Plaintext
146 lines
7.1 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Explain a simple hash join query.
|
|
explain
|
|
select *
|
|
from tpch.lineitem join tpch.orders on l_orderkey = o_orderkey;
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
row_regex:.*Max Per-Host Resource Reservation: Memory=[0-9.]*MB Threads=[0-9]*.*
|
|
row_regex:.*Per-Host Resource Estimates: Memory=[0-9.]*MB.*
|
|
'Analyzed query: SELECT * FROM tpch.lineitem INNER JOIN tpch.orders ON l_orderkey'
|
|
'= o_orderkey'
|
|
''
|
|
'F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
|
|
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=1
|
|
' PLAN-ROOT SINK'
|
|
' | output exprs: tpch.lineitem.l_orderkey, tpch.lineitem.l_partkey, tpch.lineitem.l_suppkey, tpch.lineitem.l_linenumber, tpch.lineitem.l_quantity, tpch.lineitem.l_extendedprice, tpch.lineitem.l_discount, tpch.lineitem.l_tax, tpch.lineitem.l_returnflag, tpch.lineitem.l_linestatus, tpch.lineitem.l_shipdate, tpch.lineitem.l_commitdate, tpch.lineitem.l_receiptdate, tpch.lineitem.l_shipinstruct, tpch.lineitem.l_shipmode, tpch.lineitem.l_comment, tpch.orders.o_orderkey, tpch.orders.o_custkey, tpch.orders.o_orderstatus, tpch.orders.o_totalprice, tpch.orders.o_orderdate, tpch.orders.o_orderpriority, tpch.orders.o_clerk, tpch.orders.o_shippriority, tpch.orders.o_comment'
|
|
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B spill-buffer=[0-9.]*MB thread-reservation=0
|
|
' |'
|
|
' 04:EXCHANGE [UNPARTITIONED]'
|
|
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
|
|
' tuple-ids=0,1 row-size=402B cardinality=5.76M'
|
|
' in pipelines: 00(GETNEXT)'
|
|
''
|
|
'F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3'
|
|
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
|
|
' DATASTREAM SINK [FRAGMENT=F02, EXCHANGE=04, UNPARTITIONED]'
|
|
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
|
|
' 02:HASH JOIN [INNER JOIN, BROADCAST]'
|
|
' | hash predicates: l_orderkey = o_orderkey'
|
|
' | fk/pk conjuncts: l_orderkey = o_orderkey'
|
|
' | runtime filters: RF000[bloom] <- o_orderkey'
|
|
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B spill-buffer=[0-9.]*MB thread-reservation=.*
|
|
' | tuple-ids=0,1 row-size=402B cardinality=5.76M'
|
|
' | in pipelines: 00(GETNEXT), 01(OPEN)'
|
|
' |'
|
|
' |--03:EXCHANGE [BROADCAST]'
|
|
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
|
|
' | tuple-ids=1 row-size=171B cardinality=1.50M'
|
|
' | in pipelines: 01(GETNEXT)'
|
|
' |'
|
|
' 00:SCAN $FILESYSTEM_NAME [tpch.lineitem, RANDOM]'
|
|
row_regex:.*partitions=1/1 files=1 size=.*
|
|
' runtime filters: RF000[bloom] -> l_orderkey'
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=[0-9.]*[A-Z]* size=.*
|
|
' columns: all'
|
|
row_regex:.*extrapolated-rows=disabled max-scan-range-rows=[0-9]*.*
|
|
' file formats: [TEXT]'
|
|
row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=1.*
|
|
' tuple-ids=0 row-size=231B cardinality=6.00M'
|
|
' in pipelines: 00(GETNEXT)'
|
|
''
|
|
row_regex:.*F01:PLAN FRAGMENT \[RANDOM\] hosts=[1-2] instances=[1-2]
|
|
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
|
|
' DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=03, BROADCAST]'
|
|
row_regex:.* | mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
|
|
' 01:SCAN $FILESYSTEM_NAME [tpch.orders, RANDOM]'
|
|
row_regex:.*partitions=1/1 files=1 size=.*
|
|
' stored statistics:'
|
|
row_regex:.*table: rows=[0-9.]*[A-Z]* size=.*
|
|
' columns: all'
|
|
row_regex:.* extrapolated-rows=disabled max-scan-range-rows=[0-9]*.*
|
|
' file formats: [TEXT]'
|
|
row_regex:.* mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
|
|
' tuple-ids=1 row-size=171B cardinality=1.50M'
|
|
' in pipelines: 01(GETNEXT)'
|
|
====
|
|
---- QUERY
|
|
# Tests the warning about missing table stats in the explain header.
|
|
explain select count(t1.int_col), avg(t2.float_col), sum(t3.bigint_col)
|
|
from functional_avro.alltypes t1
|
|
inner join functional_parquet.alltypessmall t2 on (t1.id = t2.id)
|
|
left outer join functional_avro.alltypes t3 on (t2.id = t3.id)
|
|
where t1.month = 1 and t2.year = 2009 and t3.bool_col = false
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'WARNING: The following tables are missing relevant table and/or column statistics.'
|
|
'functional_avro.alltypes, functional_parquet.alltypessmall'
|
|
====
|
|
---- QUERY
|
|
# Tests whether all file formats are present in the explain string.
|
|
explain select * from functional.alltypesmixedformat;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
' file formats: [PARQUET, RC_FILE, SEQUENCE_FILE, TEXT]'
|
|
====
|
|
---- QUERY
|
|
# Tests whether file formats are present in the explain string in case of Iceberg tables.
|
|
explain select * from functional_parquet.iceberg_partitioned;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
' file formats: [PARQUET]'
|
|
====
|
|
---- QUERY
|
|
# Tests 'SimplifyCastExprRule' rewrite when group by and order by clauses contain cast exprs
|
|
# 'd_date' is STRING type, rewrite succeed
|
|
explain
|
|
select month(to_timestamp(cast(d_date AS string),'yyyy-MM-dd')) a
|
|
from tpcds_parquet.date_dim
|
|
group by month(to_timestamp(cast(d_date AS string),'yyyy-MM-dd'))
|
|
order by month(to_timestamp(cast(d_date AS string),'yyyy-MM-dd'))
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Analyzed query: SELECT month(to_timestamp(d_date, 'yyyy-MM-dd')) a FROM'
|
|
'tpcds_parquet.date_dim GROUP BY month(to_timestamp(d_date, 'yyyy-MM-dd')) ORDER'
|
|
'BY month(to_timestamp(d_date, 'yyyy-MM-dd')) ASC'
|
|
====
|
|
---- QUERY
|
|
# Tests 'SimplifyCastExprRule' rewrite when group by and order by clauses contain aliases
|
|
# 'd_date' is STRING type, rewrite succeed
|
|
# Aliases in group by clauses will substituted by exprs
|
|
explain
|
|
select month(to_timestamp(cast(d_date AS string),'yyyy-MM-dd')) a
|
|
from tpcds_parquet.date_dim
|
|
group by a
|
|
order by a
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Analyzed query: SELECT month(to_timestamp(d_date, 'yyyy-MM-dd')) a FROM'
|
|
'tpcds_parquet.date_dim GROUP BY month(to_timestamp(d_date, 'yyyy-MM-dd')) ORDER'
|
|
'BY a ASC'
|
|
====
|
|
---- QUERY
|
|
# Tests 'SimplifyCastExprRule' rewrite when group by and order by clauses contain exprs
|
|
# 'd_date_sk' is INT type, not rewrite
|
|
explain
|
|
select month(to_timestamp(cast(d_date_sk AS string),'yyyy-MM-dd')) a
|
|
from tpcds_parquet.date_dim
|
|
group by month(to_timestamp(cast(d_date_sk AS string),'yyyy-MM-dd'))
|
|
order by month(to_timestamp(cast(d_date_sk AS string),'yyyy-MM-dd'))
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Analyzed query: SELECT month(to_timestamp(CAST(d_date_sk AS STRING),'
|
|
''yyyy-MM-dd')) a FROM tpcds_parquet.date_dim GROUP BY'
|
|
'month(to_timestamp(CAST(d_date_sk AS STRING), 'yyyy-MM-dd')) ORDER BY'
|
|
'month(to_timestamp(CAST(d_date_sk AS STRING), 'yyyy-MM-dd')) ASC'
|
|
====
|
|
---- QUERY
|
|
# Tests 'SimplifyCastExprRule' rewrite when group by and order by clauses contain aliases
|
|
# 'd_date_sk' is INT type, not rewrite
|
|
# Aliases in group by clauses will substituted by exprs
|
|
explain
|
|
select month(to_timestamp(cast(d_date_sk AS string),'yyyy-MM-dd')) a
|
|
from tpcds_parquet.date_dim
|
|
group by a
|
|
order by a
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Analyzed query: SELECT month(to_timestamp(CAST(d_date_sk AS STRING),'
|
|
''yyyy-MM-dd')) a FROM tpcds_parquet.date_dim GROUP BY'
|
|
'month(to_timestamp(CAST(d_date_sk AS STRING), 'yyyy-MM-dd')) ORDER BY a ASC'
|
|
====
|