Files
impala/testdata/workloads/functional-query/queries/QueryTest/spilling.test
Tim Armstrong 34c95c9590 IMPALA-2345,2991: test coverage for spilling and sorts
Add missing coverage for sorting by CHAR and VARCHAR.

Add more coverage for spilling sorts.

Fix spilling tests: ensure that they actually reliably spill (many of
them had memory limits high enough that they could run entirely in
memory).

I ran this in a loop for a while to flush out flaky tests. The tests
should be fairly predictable given that they're not run concurrently
with other tests and we allocate enough block manager memory so that
each operator can obtain its reservation.

Change-Id: Ia2d2627a2c327dcdf269ea3216385b1af9dfa305
Reviewed-on: http://gerrit.cloudera.org:8080/2877
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Internal Jenkins
2016-05-12 14:17:55 -07:00

589 lines
18 KiB
Plaintext

====
---- QUERY
set max_block_mgr_memory=25m;
select l_orderkey, count(*)
from lineitem
group by 1
order by 1 limit 10
---- RESULTS
1,6
2,1
3,6
4,1
5,3
6,1
7,7
32,6
33,4
34,3
---- TYPES
BIGINT, BIGINT
---- RUNTIME_PROFILE
# Verify that spilling and passthrough were activated.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
====
---- QUERY
# Test query with string grouping column and string agg columns
# Could only get it to spill reliably with num_nodes=1.
# TODO: revisit with new buffer pool.
set num_nodes=1;
set max_block_mgr_memory=25m;
select l_returnflag, l_orderkey, avg(l_tax), min(l_shipmode)
from lineitem
group by 1,2
order by 1,2 limit 3
---- RESULTS
'A',3,0.05,'RAIL'
'A',5,0.03,'AIR'
'A',6,0.03,'TRUCK'
---- TYPES
STRING, BIGINT, DECIMAL, STRING
---- RUNTIME_PROFILE
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
====
---- QUERY
set max_block_mgr_memory=25m;
select l_orderkey, count(*)
from lineitem
group by 1
order by 1 limit 10;
---- RESULTS
1,6
2,1
3,6
4,1
5,3
6,1
7,7
32,6
33,4
34,3
---- TYPES
BIGINT, BIGINT
---- RUNTIME_PROFILE
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
====
---- QUERY
# Test query with string grouping column
# Could only get it to spill reliably with num_nodes=1.
# TODO: revisit with new buffer pool.
set num_nodes=1;
set max_block_mgr_memory=25m;
select l_comment, count(*)
from lineitem
group by 1
order by count(*) desc limit 5
---- RESULTS
' furiously',943
' carefully',893
' carefully ',875
'carefully ',854
' furiously ',845
---- TYPES
STRING, BIGINT
---- RUNTIME_PROFILE
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# Test query with string grouping column and string agg columns
# Could only get it to spill reliably with num_nodes=1.
# TODO: revisit with new buffer pool.
set num_nodes=1;
set max_block_mgr_memory=25m;
select l_returnflag, l_orderkey, round(avg(l_tax),2), min(l_shipmode)
from lineitem
group by 1,2
order by 1,2 limit 3;
---- RESULTS
'A',3,0.05,'RAIL'
'A',5,0.03,'AIR'
'A',6,0.03,'TRUCK'
---- TYPES
STRING, BIGINT, DECIMAL, STRING
---- RUNTIME_PROFILE
# Verify that spilling happened in the agg.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# Test with string intermediate state (avg() uses string intermediate value).
set max_block_mgr_memory=25m;
select l_orderkey, avg(l_orderkey)
from lineitem
group by 1
order by 1 limit 5
---- RESULTS
1,1
2,2
3,3
4,4
5,5
---- TYPES
BIGINT, DOUBLE
---- RUNTIME_PROFILE
# Verify that passthrough and spilling happened in the pre and merge agg.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
====
---- QUERY
set num_nodes=0;
set max_block_mgr_memory=100m;
select count(l1.l_tax)
from
lineitem l1,
lineitem l2,
lineitem l3
where
l1.l_tax < 0.01 and
l2.l_tax < 0.04 and
l1.l_orderkey = l2.l_orderkey and
l1.l_orderkey = l3.l_orderkey and
l1.l_comment = l3.l_comment and
l1.l_shipdate = l3.l_shipdate
---- RESULTS
1846743
---- TYPES
BIGINT
---- RUNTIME_PROFILE
# Verify that at least one of the joins was spilled.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
====
---- QUERY
set num_nodes=0;
set max_block_mgr_memory=40m;
select max(t1.total_count), max(t1.l_shipinstruct), max(t1.l_comment) from
(select l_shipinstruct, l_comment, count(*) over () total_count from lineitem) t1
---- RESULTS
6001215,'TAKE BACK RETURN','zzle? slyly final platelets sleep quickly. '
---- TYPES
BIGINT, STRING, STRING
---- RUNTIME_PROFILE
# Indirectly verify that the analytic spilled: if it spills a block, it must repin it.
row_regex: .*PinTime: [1-9][0-9]*.*
====
---- QUERY
# Run this query with very low memory. Since the tables are small, the PA/PHJ should be
# using buffers much smaller than the io buffer.
set max_block_mgr_memory=10m;
select a.int_col, count(*)
from functional.alltypessmall a, functional.alltypessmall b, functional.alltypessmall c
where a.id = b.id and b.id = c.id group by a.int_col
---- RESULTS
0,12
1,12
2,12
3,12
4,12
5,8
6,8
7,8
8,8
9,8
---- TYPES
INT, BIGINT
---- RUNTIME_PROFILE
# This query is not meant to spill.
row_regex: .*SpilledPartitions: 0 .*
====
---- QUERY: TPCH-Q21
# Adding TPCH-Q21 in the spilling test to check for IMPALA-1471 (spilling left anti
# and left outer joins were returning wrong results).
# Q21 - Suppliers Who Kept Orders Waiting Query
set num_nodes=0;
set max_block_mgr_memory=65m;
select
s_name,
count(*) as numwait
from
supplier,
lineitem l1,
orders,
nation
where
s_suppkey = l1.l_suppkey
and o_orderkey = l1.l_orderkey
and o_orderstatus = 'F'
and l1.l_receiptdate > l1.l_commitdate
and exists (
select
*
from
lineitem l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select
*
from
lineitem l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
and l3.l_receiptdate > l3.l_commitdate
)
and s_nationkey = n_nationkey
and n_name = 'SAUDI ARABIA'
group by
s_name
order by
numwait desc,
s_name
limit 100
---- RESULTS
'Supplier#000002829',20
'Supplier#000005808',18
'Supplier#000000262',17
'Supplier#000000496',17
'Supplier#000002160',17
'Supplier#000002301',17
'Supplier#000002540',17
'Supplier#000003063',17
'Supplier#000005178',17
'Supplier#000008331',17
'Supplier#000002005',16
'Supplier#000002095',16
'Supplier#000005799',16
'Supplier#000005842',16
'Supplier#000006450',16
'Supplier#000006939',16
'Supplier#000009200',16
'Supplier#000009727',16
'Supplier#000000486',15
'Supplier#000000565',15
'Supplier#000001046',15
'Supplier#000001047',15
'Supplier#000001161',15
'Supplier#000001336',15
'Supplier#000001435',15
'Supplier#000003075',15
'Supplier#000003335',15
'Supplier#000005649',15
'Supplier#000006027',15
'Supplier#000006795',15
'Supplier#000006800',15
'Supplier#000006824',15
'Supplier#000007131',15
'Supplier#000007382',15
'Supplier#000008913',15
'Supplier#000009787',15
'Supplier#000000633',14
'Supplier#000001960',14
'Supplier#000002323',14
'Supplier#000002490',14
'Supplier#000002993',14
'Supplier#000003101',14
'Supplier#000004489',14
'Supplier#000005435',14
'Supplier#000005583',14
'Supplier#000005774',14
'Supplier#000007579',14
'Supplier#000008180',14
'Supplier#000008695',14
'Supplier#000009224',14
'Supplier#000000357',13
'Supplier#000000436',13
'Supplier#000000610',13
'Supplier#000000788',13
'Supplier#000000889',13
'Supplier#000001062',13
'Supplier#000001498',13
'Supplier#000002056',13
'Supplier#000002312',13
'Supplier#000002344',13
'Supplier#000002596',13
'Supplier#000002615',13
'Supplier#000002978',13
'Supplier#000003048',13
'Supplier#000003234',13
'Supplier#000003727',13
'Supplier#000003806',13
'Supplier#000004472',13
'Supplier#000005236',13
'Supplier#000005906',13
'Supplier#000006241',13
'Supplier#000006326',13
'Supplier#000006384',13
'Supplier#000006394',13
'Supplier#000006624',13
'Supplier#000006629',13
'Supplier#000006682',13
'Supplier#000006737',13
'Supplier#000006825',13
'Supplier#000007021',13
'Supplier#000007417',13
'Supplier#000007497',13
'Supplier#000007602',13
'Supplier#000008134',13
'Supplier#000008234',13
'Supplier#000009435',13
'Supplier#000009436',13
'Supplier#000009564',13
'Supplier#000009896',13
'Supplier#000000379',12
'Supplier#000000673',12
'Supplier#000000762',12
'Supplier#000000811',12
'Supplier#000000821',12
'Supplier#000001337',12
'Supplier#000001916',12
'Supplier#000001925',12
'Supplier#000002039',12
'Supplier#000002357',12
'Supplier#000002483',12
---- TYPES
string, bigint
---- RUNTIME_PROFILE
# Verify that at least one of the joins was spilled.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# Test aggregation spill with group_concat distinct
set num_nodes=1;
set max_block_mgr_memory=100m;
select l_orderkey, count(*), group_concat(distinct l_linestatus, '|')
from lineitem
group by 1
order by 1 limit 10
---- RESULTS
1,6,'O'
2,1,'O'
3,6,'F'
4,1,'O'
5,3,'F'
6,1,'F'
7,7,'O'
32,6,'O'
33,4,'F'
34,3,'O'
---- TYPES
BIGINT, BIGINT, STRING
---- RUNTIME_PROFILE
# Verify that at least one of the aggs spilled.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# Regression test for IMPALA-2612. The following query will cause CastToChar
# to be invoked when building the hash tables in partitioned aggregation
# nodes. CastToChar will do "local" memory allocation. Without the fix of
# IMPALA-2612, the peak memory consumption will be higher.
set mem_limit=800m;
set num_nodes=1;
set num_scanner_threads=1;
select count(distinct concat(cast(l_comment as char(120)), cast(l_comment as char(120)),
cast(l_comment as char(120)), cast(l_comment as char(120)),
cast(l_comment as char(120)), cast(l_comment as char(120))))
from lineitem
---- RESULTS
4502054
---- TYPES
BIGINT
---- RUNTIME_PROFILE
# Verify that the agg spilled.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# Test sort with inlined char column materialized by exprs.
# Set low memory limit to force spilling.
set num_nodes=0;
set max_block_mgr_memory=4m;
# IMPALA-3332: comparator makes local allocations that cause runaway memory consumption.
# When IMPALA-3332 is fixed, can reenable this memory limit.
#set mem_limit=200m;
set disable_outermost_topn=1;
select cast(l_comment as char(50))
from lineitem
order by 1
limit 20;
---- RESULTS
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias about the en '
' Tiresias about the slyly ironic dinos ca '
' Tiresias about the slyly unus '
' Tiresias above '
' Tiresias above the fox '
' Tiresias above the furiously final th '
' Tiresias above the slyly expr '
' Tiresias above the stealthily p '
---- TYPES
CHAR
---- RUNTIME_PROFILE
# Verify that the sort actually spilled
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
====
---- QUERY
# Test sort with input inlined char column materialized before sort.
set num_nodes=0;
set mem_limit=200m;
set max_block_mgr_memory=4m;
set disable_outermost_topn=1;
select char_col
from (select cast(l_comment as char(50)) char_col
from lineitem) subquery
order by 1
limit 20;
---- RESULTS
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias about the en '
' Tiresias about the slyly ironic dinos ca '
' Tiresias about the slyly unus '
' Tiresias above '
' Tiresias above the fox '
' Tiresias above the furiously final th '
' Tiresias above the slyly expr '
' Tiresias above the stealthily p '
---- TYPES
CHAR
---- RUNTIME_PROFILE
# Verify that the sort actually spilled
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
====
---- QUERY
# Test sort with input non-inlined char column materialized before sort.
# Set low memory limit to force spilling.
set num_nodes=0;
set mem_limit=200m;
set max_block_mgr_memory=4m;
set disable_outermost_topn=1;
select char_col
from (select cast(l_comment as char(200)) char_col
from lineitem) subquery
order by 1
limit 20;
---- RESULTS
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias about the en '
' Tiresias about the slyly ironic dinos ca '
' Tiresias about the slyly unus '
' Tiresias above '
' Tiresias above the fox '
' Tiresias above the furiously final th '
' Tiresias above the slyly expr '
' Tiresias above the stealthily p '
---- TYPES
CHAR
---- RUNTIME_PROFILE
# Verify that the sort actually spilled
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
====
---- QUERY
# Test sort with varchar column materialized by exprs.
# Set low memory limit to force spilling.
set num_nodes=0;
set max_block_mgr_memory=4m;
# IMPALA-3332: comparator makes local allocations that cause runaway memory consumption.
# When IMPALA-3332 is fixed, can reenable this memory limit.
#set mem_limit=200m;
set disable_outermost_topn=1;
select cast(l_comment as varchar(50))
from lineitem
order by 1
limit 20;
---- RESULTS
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias about the en'
' Tiresias about the slyly ironic dinos ca'
' Tiresias about the slyly unus'
' Tiresias above'
' Tiresias above the fox'
' Tiresias above the furiously final th'
' Tiresias above the slyly expr'
' Tiresias above the stealthily p'
---- TYPES
STRING
---- RUNTIME_PROFILE
# Verify that the sort actually spilled
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
====
---- QUERY
# Test sort with input varchar column materialized before sort.
# Set low memory limit to force spilling.
set num_nodes=0;
set mem_limit=200m;
set max_block_mgr_memory=4m;
set disable_outermost_topn=1;
select char_col
from (select cast(l_comment as varchar(50)) char_col
from lineitem) subquery
order by 1
limit 20;
---- RESULTS
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias about the en'
' Tiresias about the slyly ironic dinos ca'
' Tiresias about the slyly unus'
' Tiresias above'
' Tiresias above the fox'
' Tiresias above the furiously final th'
' Tiresias above the slyly expr'
' Tiresias above the stealthily p'
---- TYPES
STRING
---- RUNTIME_PROFILE
# Verify that the sort actually spilled
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
====