mirror of
https://github.com/apache/impala.git
synced 2026-01-02 12:00:33 -05:00
Add missing coverage for sorting by CHAR and VARCHAR. Add more coverage for spilling sorts. Fix spilling tests: ensure that they actually reliably spill (many of them had memory limits high enough that they could run entirely in memory). I ran this in a loop for a while to flush out flaky tests. The tests should be fairly predictable given that they're not run concurrently with other tests and we allocate enough block manager memory so that each operator can obtain its reservation. Change-Id: Ia2d2627a2c327dcdf269ea3216385b1af9dfa305 Reviewed-on: http://gerrit.cloudera.org:8080/2877 Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com> Tested-by: Internal Jenkins
589 lines
18 KiB
Plaintext
589 lines
18 KiB
Plaintext
====
|
|
---- QUERY
|
|
set max_block_mgr_memory=25m;
|
|
select l_orderkey, count(*)
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 10
|
|
---- RESULTS
|
|
1,6
|
|
2,1
|
|
3,6
|
|
4,1
|
|
5,3
|
|
6,1
|
|
7,7
|
|
32,6
|
|
33,4
|
|
34,3
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
---- RUNTIME_PROFILE
|
|
# Verify that spilling and passthrough were activated.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test query with string grouping column and string agg columns
|
|
# Could only get it to spill reliably with num_nodes=1.
|
|
# TODO: revisit with new buffer pool.
|
|
set num_nodes=1;
|
|
set max_block_mgr_memory=25m;
|
|
select l_returnflag, l_orderkey, avg(l_tax), min(l_shipmode)
|
|
from lineitem
|
|
group by 1,2
|
|
order by 1,2 limit 3
|
|
---- RESULTS
|
|
'A',3,0.05,'RAIL'
|
|
'A',5,0.03,'AIR'
|
|
'A',6,0.03,'TRUCK'
|
|
---- TYPES
|
|
STRING, BIGINT, DECIMAL, STRING
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
set max_block_mgr_memory=25m;
|
|
select l_orderkey, count(*)
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 10;
|
|
---- RESULTS
|
|
1,6
|
|
2,1
|
|
3,6
|
|
4,1
|
|
5,3
|
|
6,1
|
|
7,7
|
|
32,6
|
|
33,4
|
|
34,3
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test query with string grouping column
|
|
# Could only get it to spill reliably with num_nodes=1.
|
|
# TODO: revisit with new buffer pool.
|
|
set num_nodes=1;
|
|
set max_block_mgr_memory=25m;
|
|
select l_comment, count(*)
|
|
from lineitem
|
|
group by 1
|
|
order by count(*) desc limit 5
|
|
---- RESULTS
|
|
' furiously',943
|
|
' carefully',893
|
|
' carefully ',875
|
|
'carefully ',854
|
|
' furiously ',845
|
|
---- TYPES
|
|
STRING, BIGINT
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test query with string grouping column and string agg columns
|
|
# Could only get it to spill reliably with num_nodes=1.
|
|
# TODO: revisit with new buffer pool.
|
|
set num_nodes=1;
|
|
set max_block_mgr_memory=25m;
|
|
select l_returnflag, l_orderkey, round(avg(l_tax),2), min(l_shipmode)
|
|
from lineitem
|
|
group by 1,2
|
|
order by 1,2 limit 3;
|
|
---- RESULTS
|
|
'A',3,0.05,'RAIL'
|
|
'A',5,0.03,'AIR'
|
|
'A',6,0.03,'TRUCK'
|
|
---- TYPES
|
|
STRING, BIGINT, DECIMAL, STRING
|
|
---- RUNTIME_PROFILE
|
|
# Verify that spilling happened in the agg.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test with string intermediate state (avg() uses string intermediate value).
|
|
set max_block_mgr_memory=25m;
|
|
select l_orderkey, avg(l_orderkey)
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 5
|
|
---- RESULTS
|
|
1,1
|
|
2,2
|
|
3,3
|
|
4,4
|
|
5,5
|
|
---- TYPES
|
|
BIGINT, DOUBLE
|
|
---- RUNTIME_PROFILE
|
|
# Verify that passthrough and spilling happened in the pre and merge agg.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=100m;
|
|
select count(l1.l_tax)
|
|
from
|
|
lineitem l1,
|
|
lineitem l2,
|
|
lineitem l3
|
|
where
|
|
l1.l_tax < 0.01 and
|
|
l2.l_tax < 0.04 and
|
|
l1.l_orderkey = l2.l_orderkey and
|
|
l1.l_orderkey = l3.l_orderkey and
|
|
l1.l_comment = l3.l_comment and
|
|
l1.l_shipdate = l3.l_shipdate
|
|
---- RESULTS
|
|
1846743
|
|
---- TYPES
|
|
BIGINT
|
|
---- RUNTIME_PROFILE
|
|
# Verify that at least one of the joins was spilled.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=40m;
|
|
select max(t1.total_count), max(t1.l_shipinstruct), max(t1.l_comment) from
|
|
(select l_shipinstruct, l_comment, count(*) over () total_count from lineitem) t1
|
|
---- RESULTS
|
|
6001215,'TAKE BACK RETURN','zzle? slyly final platelets sleep quickly. '
|
|
---- TYPES
|
|
BIGINT, STRING, STRING
|
|
---- RUNTIME_PROFILE
|
|
# Indirectly verify that the analytic spilled: if it spills a block, it must repin it.
|
|
row_regex: .*PinTime: [1-9][0-9]*.*
|
|
====
|
|
---- QUERY
|
|
# Run this query with very low memory. Since the tables are small, the PA/PHJ should be
|
|
# using buffers much smaller than the io buffer.
|
|
set max_block_mgr_memory=10m;
|
|
select a.int_col, count(*)
|
|
from functional.alltypessmall a, functional.alltypessmall b, functional.alltypessmall c
|
|
where a.id = b.id and b.id = c.id group by a.int_col
|
|
---- RESULTS
|
|
0,12
|
|
1,12
|
|
2,12
|
|
3,12
|
|
4,12
|
|
5,8
|
|
6,8
|
|
7,8
|
|
8,8
|
|
9,8
|
|
---- TYPES
|
|
INT, BIGINT
|
|
---- RUNTIME_PROFILE
|
|
# This query is not meant to spill.
|
|
row_regex: .*SpilledPartitions: 0 .*
|
|
====
|
|
---- QUERY: TPCH-Q21
|
|
# Adding TPCH-Q21 in the spilling test to check for IMPALA-1471 (spilling left anti
|
|
# and left outer joins were returning wrong results).
|
|
# Q21 - Suppliers Who Kept Orders Waiting Query
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=65m;
|
|
select
|
|
s_name,
|
|
count(*) as numwait
|
|
from
|
|
supplier,
|
|
lineitem l1,
|
|
orders,
|
|
nation
|
|
where
|
|
s_suppkey = l1.l_suppkey
|
|
and o_orderkey = l1.l_orderkey
|
|
and o_orderstatus = 'F'
|
|
and l1.l_receiptdate > l1.l_commitdate
|
|
and exists (
|
|
select
|
|
*
|
|
from
|
|
lineitem l2
|
|
where
|
|
l2.l_orderkey = l1.l_orderkey
|
|
and l2.l_suppkey <> l1.l_suppkey
|
|
)
|
|
and not exists (
|
|
select
|
|
*
|
|
from
|
|
lineitem l3
|
|
where
|
|
l3.l_orderkey = l1.l_orderkey
|
|
and l3.l_suppkey <> l1.l_suppkey
|
|
and l3.l_receiptdate > l3.l_commitdate
|
|
)
|
|
and s_nationkey = n_nationkey
|
|
and n_name = 'SAUDI ARABIA'
|
|
group by
|
|
s_name
|
|
order by
|
|
numwait desc,
|
|
s_name
|
|
limit 100
|
|
---- RESULTS
|
|
'Supplier#000002829',20
|
|
'Supplier#000005808',18
|
|
'Supplier#000000262',17
|
|
'Supplier#000000496',17
|
|
'Supplier#000002160',17
|
|
'Supplier#000002301',17
|
|
'Supplier#000002540',17
|
|
'Supplier#000003063',17
|
|
'Supplier#000005178',17
|
|
'Supplier#000008331',17
|
|
'Supplier#000002005',16
|
|
'Supplier#000002095',16
|
|
'Supplier#000005799',16
|
|
'Supplier#000005842',16
|
|
'Supplier#000006450',16
|
|
'Supplier#000006939',16
|
|
'Supplier#000009200',16
|
|
'Supplier#000009727',16
|
|
'Supplier#000000486',15
|
|
'Supplier#000000565',15
|
|
'Supplier#000001046',15
|
|
'Supplier#000001047',15
|
|
'Supplier#000001161',15
|
|
'Supplier#000001336',15
|
|
'Supplier#000001435',15
|
|
'Supplier#000003075',15
|
|
'Supplier#000003335',15
|
|
'Supplier#000005649',15
|
|
'Supplier#000006027',15
|
|
'Supplier#000006795',15
|
|
'Supplier#000006800',15
|
|
'Supplier#000006824',15
|
|
'Supplier#000007131',15
|
|
'Supplier#000007382',15
|
|
'Supplier#000008913',15
|
|
'Supplier#000009787',15
|
|
'Supplier#000000633',14
|
|
'Supplier#000001960',14
|
|
'Supplier#000002323',14
|
|
'Supplier#000002490',14
|
|
'Supplier#000002993',14
|
|
'Supplier#000003101',14
|
|
'Supplier#000004489',14
|
|
'Supplier#000005435',14
|
|
'Supplier#000005583',14
|
|
'Supplier#000005774',14
|
|
'Supplier#000007579',14
|
|
'Supplier#000008180',14
|
|
'Supplier#000008695',14
|
|
'Supplier#000009224',14
|
|
'Supplier#000000357',13
|
|
'Supplier#000000436',13
|
|
'Supplier#000000610',13
|
|
'Supplier#000000788',13
|
|
'Supplier#000000889',13
|
|
'Supplier#000001062',13
|
|
'Supplier#000001498',13
|
|
'Supplier#000002056',13
|
|
'Supplier#000002312',13
|
|
'Supplier#000002344',13
|
|
'Supplier#000002596',13
|
|
'Supplier#000002615',13
|
|
'Supplier#000002978',13
|
|
'Supplier#000003048',13
|
|
'Supplier#000003234',13
|
|
'Supplier#000003727',13
|
|
'Supplier#000003806',13
|
|
'Supplier#000004472',13
|
|
'Supplier#000005236',13
|
|
'Supplier#000005906',13
|
|
'Supplier#000006241',13
|
|
'Supplier#000006326',13
|
|
'Supplier#000006384',13
|
|
'Supplier#000006394',13
|
|
'Supplier#000006624',13
|
|
'Supplier#000006629',13
|
|
'Supplier#000006682',13
|
|
'Supplier#000006737',13
|
|
'Supplier#000006825',13
|
|
'Supplier#000007021',13
|
|
'Supplier#000007417',13
|
|
'Supplier#000007497',13
|
|
'Supplier#000007602',13
|
|
'Supplier#000008134',13
|
|
'Supplier#000008234',13
|
|
'Supplier#000009435',13
|
|
'Supplier#000009436',13
|
|
'Supplier#000009564',13
|
|
'Supplier#000009896',13
|
|
'Supplier#000000379',12
|
|
'Supplier#000000673',12
|
|
'Supplier#000000762',12
|
|
'Supplier#000000811',12
|
|
'Supplier#000000821',12
|
|
'Supplier#000001337',12
|
|
'Supplier#000001916',12
|
|
'Supplier#000001925',12
|
|
'Supplier#000002039',12
|
|
'Supplier#000002357',12
|
|
'Supplier#000002483',12
|
|
---- TYPES
|
|
string, bigint
|
|
---- RUNTIME_PROFILE
|
|
# Verify that at least one of the joins was spilled.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test aggregation spill with group_concat distinct
|
|
set num_nodes=1;
|
|
set max_block_mgr_memory=100m;
|
|
select l_orderkey, count(*), group_concat(distinct l_linestatus, '|')
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 10
|
|
---- RESULTS
|
|
1,6,'O'
|
|
2,1,'O'
|
|
3,6,'F'
|
|
4,1,'O'
|
|
5,3,'F'
|
|
6,1,'F'
|
|
7,7,'O'
|
|
32,6,'O'
|
|
33,4,'F'
|
|
34,3,'O'
|
|
---- TYPES
|
|
BIGINT, BIGINT, STRING
|
|
---- RUNTIME_PROFILE
|
|
# Verify that at least one of the aggs spilled.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-2612. The following query will cause CastToChar
|
|
# to be invoked when building the hash tables in partitioned aggregation
|
|
# nodes. CastToChar will do "local" memory allocation. Without the fix of
|
|
# IMPALA-2612, the peak memory consumption will be higher.
|
|
set mem_limit=800m;
|
|
set num_nodes=1;
|
|
set num_scanner_threads=1;
|
|
select count(distinct concat(cast(l_comment as char(120)), cast(l_comment as char(120)),
|
|
cast(l_comment as char(120)), cast(l_comment as char(120)),
|
|
cast(l_comment as char(120)), cast(l_comment as char(120))))
|
|
from lineitem
|
|
---- RESULTS
|
|
4502054
|
|
---- TYPES
|
|
BIGINT
|
|
---- RUNTIME_PROFILE
|
|
# Verify that the agg spilled.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test sort with inlined char column materialized by exprs.
|
|
# Set low memory limit to force spilling.
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=4m;
|
|
# IMPALA-3332: comparator makes local allocations that cause runaway memory consumption.
|
|
# When IMPALA-3332 is fixed, can reenable this memory limit.
|
|
#set mem_limit=200m;
|
|
set disable_outermost_topn=1;
|
|
select cast(l_comment as char(50))
|
|
from lineitem
|
|
order by 1
|
|
limit 20;
|
|
---- RESULTS
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias about the en '
|
|
' Tiresias about the slyly ironic dinos ca '
|
|
' Tiresias about the slyly unus '
|
|
' Tiresias above '
|
|
' Tiresias above the fox '
|
|
' Tiresias above the furiously final th '
|
|
' Tiresias above the slyly expr '
|
|
' Tiresias above the stealthily p '
|
|
---- TYPES
|
|
CHAR
|
|
---- RUNTIME_PROFILE
|
|
# Verify that the sort actually spilled
|
|
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test sort with input inlined char column materialized before sort.
|
|
set num_nodes=0;
|
|
set mem_limit=200m;
|
|
set max_block_mgr_memory=4m;
|
|
set disable_outermost_topn=1;
|
|
select char_col
|
|
from (select cast(l_comment as char(50)) char_col
|
|
from lineitem) subquery
|
|
order by 1
|
|
limit 20;
|
|
---- RESULTS
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias about the en '
|
|
' Tiresias about the slyly ironic dinos ca '
|
|
' Tiresias about the slyly unus '
|
|
' Tiresias above '
|
|
' Tiresias above the fox '
|
|
' Tiresias above the furiously final th '
|
|
' Tiresias above the slyly expr '
|
|
' Tiresias above the stealthily p '
|
|
---- TYPES
|
|
CHAR
|
|
---- RUNTIME_PROFILE
|
|
# Verify that the sort actually spilled
|
|
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test sort with input non-inlined char column materialized before sort.
|
|
# Set low memory limit to force spilling.
|
|
set num_nodes=0;
|
|
set mem_limit=200m;
|
|
set max_block_mgr_memory=4m;
|
|
set disable_outermost_topn=1;
|
|
select char_col
|
|
from (select cast(l_comment as char(200)) char_col
|
|
from lineitem) subquery
|
|
order by 1
|
|
limit 20;
|
|
---- RESULTS
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias about the en '
|
|
' Tiresias about the slyly ironic dinos ca '
|
|
' Tiresias about the slyly unus '
|
|
' Tiresias above '
|
|
' Tiresias above the fox '
|
|
' Tiresias above the furiously final th '
|
|
' Tiresias above the slyly expr '
|
|
' Tiresias above the stealthily p '
|
|
---- TYPES
|
|
CHAR
|
|
---- RUNTIME_PROFILE
|
|
# Verify that the sort actually spilled
|
|
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test sort with varchar column materialized by exprs.
|
|
# Set low memory limit to force spilling.
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=4m;
|
|
# IMPALA-3332: comparator makes local allocations that cause runaway memory consumption.
|
|
# When IMPALA-3332 is fixed, can reenable this memory limit.
|
|
#set mem_limit=200m;
|
|
set disable_outermost_topn=1;
|
|
select cast(l_comment as varchar(50))
|
|
from lineitem
|
|
order by 1
|
|
limit 20;
|
|
---- RESULTS
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias about the en'
|
|
' Tiresias about the slyly ironic dinos ca'
|
|
' Tiresias about the slyly unus'
|
|
' Tiresias above'
|
|
' Tiresias above the fox'
|
|
' Tiresias above the furiously final th'
|
|
' Tiresias above the slyly expr'
|
|
' Tiresias above the stealthily p'
|
|
---- TYPES
|
|
STRING
|
|
---- RUNTIME_PROFILE
|
|
# Verify that the sort actually spilled
|
|
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test sort with input varchar column materialized before sort.
|
|
# Set low memory limit to force spilling.
|
|
set num_nodes=0;
|
|
set mem_limit=200m;
|
|
set max_block_mgr_memory=4m;
|
|
set disable_outermost_topn=1;
|
|
select char_col
|
|
from (select cast(l_comment as varchar(50)) char_col
|
|
from lineitem) subquery
|
|
order by 1
|
|
limit 20;
|
|
---- RESULTS
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias about the en'
|
|
' Tiresias about the slyly ironic dinos ca'
|
|
' Tiresias about the slyly unus'
|
|
' Tiresias above'
|
|
' Tiresias above the fox'
|
|
' Tiresias above the furiously final th'
|
|
' Tiresias above the slyly expr'
|
|
' Tiresias above the stealthily p'
|
|
---- TYPES
|
|
STRING
|
|
---- RUNTIME_PROFILE
|
|
# Verify that the sort actually spilled
|
|
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
|
|
====
|