Files
impala/testdata/workloads/functional-query/queries/QueryTest/spilling.test
Michael Ho f7501d2ec1 IMPALA-3332: Free local allocations in sorter.
Sorter can have runaway memory consumption as it never frees
local allocations made in comparator_.Less(). In addition, it
doesn't check for errors generated during expression evaluation
so it may keep sorting even after failures have occurred.

This change fixes the problem by freeing local allocations for
every n invocations of comparator_.Less() where n is the row
batch size specified in the query options. Various error checks
are also added to return early if any error is encountered.

Change-Id: I941729b4836e5dbb827d4313a0b45bc5df2fa8e1
Reviewed-on: http://gerrit.cloudera.org:8080/3116
Reviewed-by: Michael Ho <kwho@cloudera.com>
Tested-by: Internal Jenkins
2016-05-23 08:40:18 -07:00

587 lines
18 KiB
Plaintext

====
---- QUERY
set max_block_mgr_memory=25m;
select l_orderkey, count(*)
from lineitem
group by 1
order by 1 limit 10
---- RESULTS
1,6
2,1
3,6
4,1
5,3
6,1
7,7
32,6
33,4
34,3
---- TYPES
BIGINT, BIGINT
---- RUNTIME_PROFILE
# Verify that spilling and passthrough were activated.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
====
---- QUERY
# Test query with string grouping column and string agg columns
# Could only get it to spill reliably with num_nodes=1.
# TODO: revisit with new buffer pool.
set num_nodes=1;
set max_block_mgr_memory=25m;
select l_returnflag, l_orderkey, avg(l_tax), min(l_shipmode)
from lineitem
group by 1,2
order by 1,2 limit 3
---- RESULTS
'A',3,0.05,'RAIL'
'A',5,0.03,'AIR'
'A',6,0.03,'TRUCK'
---- TYPES
STRING, BIGINT, DECIMAL, STRING
---- RUNTIME_PROFILE
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
====
---- QUERY
set max_block_mgr_memory=25m;
select l_orderkey, count(*)
from lineitem
group by 1
order by 1 limit 10;
---- RESULTS
1,6
2,1
3,6
4,1
5,3
6,1
7,7
32,6
33,4
34,3
---- TYPES
BIGINT, BIGINT
---- RUNTIME_PROFILE
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
====
---- QUERY
# Test query with string grouping column
# Could only get it to spill reliably with num_nodes=1.
# TODO: revisit with new buffer pool.
set num_nodes=1;
set max_block_mgr_memory=25m;
select l_comment, count(*)
from lineitem
group by 1
order by count(*) desc limit 5
---- RESULTS
' furiously',943
' carefully',893
' carefully ',875
'carefully ',854
' furiously ',845
---- TYPES
STRING, BIGINT
---- RUNTIME_PROFILE
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# Test query with string grouping column and string agg columns
# Could only get it to spill reliably with num_nodes=1.
# TODO: revisit with new buffer pool.
set num_nodes=1;
set max_block_mgr_memory=25m;
select l_returnflag, l_orderkey, round(avg(l_tax),2), min(l_shipmode)
from lineitem
group by 1,2
order by 1,2 limit 3;
---- RESULTS
'A',3,0.05,'RAIL'
'A',5,0.03,'AIR'
'A',6,0.03,'TRUCK'
---- TYPES
STRING, BIGINT, DECIMAL, STRING
---- RUNTIME_PROFILE
# Verify that spilling happened in the agg.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# Test with string intermediate state (avg() uses string intermediate value).
set max_block_mgr_memory=25m;
select l_orderkey, avg(l_orderkey)
from lineitem
group by 1
order by 1 limit 5
---- RESULTS
1,1
2,2
3,3
4,4
5,5
---- TYPES
BIGINT, DOUBLE
---- RUNTIME_PROFILE
# Verify that passthrough and spilling happened in the pre and merge agg.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
====
---- QUERY
set num_nodes=0;
set max_block_mgr_memory=100m;
select count(l1.l_tax)
from
lineitem l1,
lineitem l2,
lineitem l3
where
l1.l_tax < 0.01 and
l2.l_tax < 0.04 and
l1.l_orderkey = l2.l_orderkey and
l1.l_orderkey = l3.l_orderkey and
l1.l_comment = l3.l_comment and
l1.l_shipdate = l3.l_shipdate
---- RESULTS
1846743
---- TYPES
BIGINT
---- RUNTIME_PROFILE
# Verify that at least one of the joins was spilled.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
====
---- QUERY
set num_nodes=0;
set max_block_mgr_memory=40m;
select max(t1.total_count), max(t1.l_shipinstruct), max(t1.l_comment) from
(select l_shipinstruct, l_comment, count(*) over () total_count from lineitem) t1
---- RESULTS
6001215,'TAKE BACK RETURN','zzle? slyly final platelets sleep quickly. '
---- TYPES
BIGINT, STRING, STRING
---- RUNTIME_PROFILE
# Indirectly verify that the analytic spilled: if it spills a block, it must repin it.
row_regex: .*PinTime: [1-9][0-9]*.*
====
---- QUERY
# Run this query with very low memory. Since the tables are small, the PA/PHJ should be
# using buffers much smaller than the io buffer.
set max_block_mgr_memory=10m;
select a.int_col, count(*)
from functional.alltypessmall a, functional.alltypessmall b, functional.alltypessmall c
where a.id = b.id and b.id = c.id group by a.int_col
---- RESULTS
0,12
1,12
2,12
3,12
4,12
5,8
6,8
7,8
8,8
9,8
---- TYPES
INT, BIGINT
---- RUNTIME_PROFILE
# This query is not meant to spill.
row_regex: .*SpilledPartitions: 0 .*
====
---- QUERY: TPCH-Q21
# Adding TPCH-Q21 in the spilling test to check for IMPALA-1471 (spilling left anti
# and left outer joins were returning wrong results).
# Q21 - Suppliers Who Kept Orders Waiting Query
set num_nodes=0;
set max_block_mgr_memory=65m;
select
s_name,
count(*) as numwait
from
supplier,
lineitem l1,
orders,
nation
where
s_suppkey = l1.l_suppkey
and o_orderkey = l1.l_orderkey
and o_orderstatus = 'F'
and l1.l_receiptdate > l1.l_commitdate
and exists (
select
*
from
lineitem l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select
*
from
lineitem l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
and l3.l_receiptdate > l3.l_commitdate
)
and s_nationkey = n_nationkey
and n_name = 'SAUDI ARABIA'
group by
s_name
order by
numwait desc,
s_name
limit 100
---- RESULTS
'Supplier#000002829',20
'Supplier#000005808',18
'Supplier#000000262',17
'Supplier#000000496',17
'Supplier#000002160',17
'Supplier#000002301',17
'Supplier#000002540',17
'Supplier#000003063',17
'Supplier#000005178',17
'Supplier#000008331',17
'Supplier#000002005',16
'Supplier#000002095',16
'Supplier#000005799',16
'Supplier#000005842',16
'Supplier#000006450',16
'Supplier#000006939',16
'Supplier#000009200',16
'Supplier#000009727',16
'Supplier#000000486',15
'Supplier#000000565',15
'Supplier#000001046',15
'Supplier#000001047',15
'Supplier#000001161',15
'Supplier#000001336',15
'Supplier#000001435',15
'Supplier#000003075',15
'Supplier#000003335',15
'Supplier#000005649',15
'Supplier#000006027',15
'Supplier#000006795',15
'Supplier#000006800',15
'Supplier#000006824',15
'Supplier#000007131',15
'Supplier#000007382',15
'Supplier#000008913',15
'Supplier#000009787',15
'Supplier#000000633',14
'Supplier#000001960',14
'Supplier#000002323',14
'Supplier#000002490',14
'Supplier#000002993',14
'Supplier#000003101',14
'Supplier#000004489',14
'Supplier#000005435',14
'Supplier#000005583',14
'Supplier#000005774',14
'Supplier#000007579',14
'Supplier#000008180',14
'Supplier#000008695',14
'Supplier#000009224',14
'Supplier#000000357',13
'Supplier#000000436',13
'Supplier#000000610',13
'Supplier#000000788',13
'Supplier#000000889',13
'Supplier#000001062',13
'Supplier#000001498',13
'Supplier#000002056',13
'Supplier#000002312',13
'Supplier#000002344',13
'Supplier#000002596',13
'Supplier#000002615',13
'Supplier#000002978',13
'Supplier#000003048',13
'Supplier#000003234',13
'Supplier#000003727',13
'Supplier#000003806',13
'Supplier#000004472',13
'Supplier#000005236',13
'Supplier#000005906',13
'Supplier#000006241',13
'Supplier#000006326',13
'Supplier#000006384',13
'Supplier#000006394',13
'Supplier#000006624',13
'Supplier#000006629',13
'Supplier#000006682',13
'Supplier#000006737',13
'Supplier#000006825',13
'Supplier#000007021',13
'Supplier#000007417',13
'Supplier#000007497',13
'Supplier#000007602',13
'Supplier#000008134',13
'Supplier#000008234',13
'Supplier#000009435',13
'Supplier#000009436',13
'Supplier#000009564',13
'Supplier#000009896',13
'Supplier#000000379',12
'Supplier#000000673',12
'Supplier#000000762',12
'Supplier#000000811',12
'Supplier#000000821',12
'Supplier#000001337',12
'Supplier#000001916',12
'Supplier#000001925',12
'Supplier#000002039',12
'Supplier#000002357',12
'Supplier#000002483',12
---- TYPES
string, bigint
---- RUNTIME_PROFILE
# Verify that at least one of the joins was spilled.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# Test aggregation spill with group_concat distinct
set num_nodes=1;
set max_block_mgr_memory=100m;
select l_orderkey, count(*), group_concat(distinct l_linestatus, '|')
from lineitem
group by 1
order by 1 limit 10
---- RESULTS
1,6,'O'
2,1,'O'
3,6,'F'
4,1,'O'
5,3,'F'
6,1,'F'
7,7,'O'
32,6,'O'
33,4,'F'
34,3,'O'
---- TYPES
BIGINT, BIGINT, STRING
---- RUNTIME_PROFILE
# Verify that at least one of the aggs spilled.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# Regression test for IMPALA-2612. The following query will cause CastToChar
# to be invoked when building the hash tables in partitioned aggregation
# nodes. CastToChar will do "local" memory allocation. Without the fix of
# IMPALA-2612, the peak memory consumption will be higher.
set mem_limit=800m;
set num_nodes=1;
set num_scanner_threads=1;
select count(distinct concat(cast(l_comment as char(120)), cast(l_comment as char(120)),
cast(l_comment as char(120)), cast(l_comment as char(120)),
cast(l_comment as char(120)), cast(l_comment as char(120))))
from lineitem
---- RESULTS
4502054
---- TYPES
BIGINT
---- RUNTIME_PROFILE
# Verify that the agg spilled.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# Test sort with inlined char column materialized by exprs.
# Set low memory limit to force spilling.
# IMPALA-3332: comparator makes local allocations that cause runaway memory consumption.
set num_nodes=0;
set max_block_mgr_memory=4m;
set mem_limit=200m;
set disable_outermost_topn=1;
select cast(l_comment as char(50))
from lineitem
order by 1
limit 20;
---- RESULTS
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias about the en '
' Tiresias about the slyly ironic dinos ca '
' Tiresias about the slyly unus '
' Tiresias above '
' Tiresias above the fox '
' Tiresias above the furiously final th '
' Tiresias above the slyly expr '
' Tiresias above the stealthily p '
---- TYPES
CHAR
---- RUNTIME_PROFILE
# Verify that the sort actually spilled
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
====
---- QUERY
# Test sort with input inlined char column materialized before sort.
set num_nodes=0;
set mem_limit=200m;
set max_block_mgr_memory=4m;
set disable_outermost_topn=1;
select char_col
from (select cast(l_comment as char(50)) char_col
from lineitem) subquery
order by 1
limit 20;
---- RESULTS
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias about the en '
' Tiresias about the slyly ironic dinos ca '
' Tiresias about the slyly unus '
' Tiresias above '
' Tiresias above the fox '
' Tiresias above the furiously final th '
' Tiresias above the slyly expr '
' Tiresias above the stealthily p '
---- TYPES
CHAR
---- RUNTIME_PROFILE
# Verify that the sort actually spilled
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
====
---- QUERY
# Test sort with input non-inlined char column materialized before sort.
# Set low memory limit to force spilling.
set num_nodes=0;
set mem_limit=200m;
set max_block_mgr_memory=4m;
set disable_outermost_topn=1;
select char_col
from (select cast(l_comment as char(200)) char_col
from lineitem) subquery
order by 1
limit 20;
---- RESULTS
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias about the en '
' Tiresias about the slyly ironic dinos ca '
' Tiresias about the slyly unus '
' Tiresias above '
' Tiresias above the fox '
' Tiresias above the furiously final th '
' Tiresias above the slyly expr '
' Tiresias above the stealthily p '
---- TYPES
CHAR
---- RUNTIME_PROFILE
# Verify that the sort actually spilled
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
====
---- QUERY
# Test sort with varchar column materialized by exprs.
# Set low memory limit to force spilling.
set num_nodes=0;
set max_block_mgr_memory=4m;
# IMPALA-3332: comparator makes local allocations that cause runaway memory consumption.
set mem_limit=200m;
set disable_outermost_topn=1;
select cast(l_comment as varchar(50))
from lineitem
order by 1
limit 20;
---- RESULTS
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias about the en'
' Tiresias about the slyly ironic dinos ca'
' Tiresias about the slyly unus'
' Tiresias above'
' Tiresias above the fox'
' Tiresias above the furiously final th'
' Tiresias above the slyly expr'
' Tiresias above the stealthily p'
---- TYPES
STRING
---- RUNTIME_PROFILE
# Verify that the sort actually spilled
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
====
---- QUERY
# Test sort with input varchar column materialized before sort.
# Set low memory limit to force spilling.
set num_nodes=0;
set mem_limit=200m;
set max_block_mgr_memory=4m;
set disable_outermost_topn=1;
select char_col
from (select cast(l_comment as varchar(50)) char_col
from lineitem) subquery
order by 1
limit 20;
---- RESULTS
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias '
' Tiresias about the en'
' Tiresias about the slyly ironic dinos ca'
' Tiresias about the slyly unus'
' Tiresias above'
' Tiresias above the fox'
' Tiresias above the furiously final th'
' Tiresias above the slyly expr'
' Tiresias above the stealthily p'
---- TYPES
STRING
---- RUNTIME_PROFILE
# Verify that the sort actually spilled
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
====