Files
impala/testdata/workloads/functional-query/queries/QueryTest/spilling.test
Michael Ho ba0bd1d0da IMPALA-2612: Free local allocations once for every row batch when building hash tables.
When building hash tables for the build side in partitioned
hash join or aggreagtion, we will evaluate the build or probe
side expressions to compute the hash values for each TupleRow.
Evaluation of certain expressions (e.g. CastToChar) requires
"local" memory allocation. "Local" memory allocation is supposed
to be freed after processing each row batch.

However, the calls to free local allocations are missing in
PartitionedHashJoinNode::BuildHashTableInternal() and
PartitionedAggregationNode::ProcessStream(). This causes all
"local" memory allocation to accumulate potentially for the
entire duration of the query or until GetNext() is called.
This may lead to unnecessary memory allocation failure as
memory limit is exceeded.

This patch calls ExecNode::FreeLocalAllocations() at least once
per row-batch when building hash tables. It also adds the missing
checks for the query status in the loop building hash tables.
Please note that QueryMaintenance() isn't called due to its
overhead in memory limit checks.

Change-Id: Idbeab043a45b0aaf6b6a8c560882bd1474a1216d
Reviewed-on: http://gerrit.cloudera.org:8080/1448
Reviewed-by: Michael Ho <kwho@cloudera.com>
Tested-by: Internal Jenkins
2015-11-26 03:21:46 +00:00

356 lines
7.5 KiB
Plaintext

====
---- QUERY
set num_nodes=1;
set max_block_mgr_memory=265m;
select l_orderkey, count(*)
from lineitem
group by 1
order by 1 limit 10
---- RESULTS
1,6
2,1
3,6
4,1
5,3
6,1
7,7
32,6
33,4
34,3
---- TYPES
BIGINT, BIGINT
====
---- QUERY
# Test query with string grouping column and string agg columns
set num_nodes=1;
set max_block_mgr_memory=275m;
select l_returnflag, l_orderkey, avg(l_tax), min(l_shipmode)
from lineitem
group by 1,2
order by 1,2 limit 3
---- RESULTS
'A',3,0.05,'RAIL'
'A',5,0.03,'AIR'
'A',6,0.03,'TRUCK'
---- TYPES
STRING, BIGINT, DECIMAL, STRING
====
---- QUERY
# TODO: the distributed agg tests are highly likely of becoming flaking. The memory is
# split between the local and merge agg in an uncontrolled way, which can lead to
# pathological cases causing the queries to fail with OOM. These cases are expected
# to be unlikely with reasonable limits but we are explicitly trying to pick very low
# limits (IMPALA-1305).
set max_block_mgr_memory=275m;
select l_orderkey, count(*)
from lineitem
group by 1
order by 1 limit 10;
---- RESULTS
1,6
2,1
3,6
4,1
5,3
6,1
7,7
32,6
33,4
34,3
---- TYPES
BIGINT, BIGINT
====
---- QUERY
# Test query with string grouping column
set num_nodes=0;
set max_block_mgr_memory=275m;
select l_comment, count(*)
from lineitem
group by 1
order by count(*) desc limit 5
---- RESULTS
' furiously',943
' carefully',893
' carefully ',875
'carefully ',854
' furiously ',845
---- TYPES
STRING, BIGINT
====
---- QUERY
# Disabled due to IMPALA-1305.
# Test query with string grouping column and string agg columns
#set num_nodes=0;
#set max_block_mgr_memory=80m;
#select l_returnflag, l_orderkey, round(avg(l_tax),2), min(l_shipmode)
#from lineitem
#group by 1,2
#order by 1,2 limit 3;
#---- RESULTS
#'A',3,0.05,'RAIL'
#'A',5,0.03,'AIR'
#'A',6,0.03,'TRUCK'
#---- TYPES
#STRING, BIGINT, DOUBLE, STRING
#====
#
# Test query with string intermediate state.
set num_nodes=0;
set max_block_mgr_memory=275m;
select l_orderkey, avg(l_orderkey)
from lineitem
group by 1
order by 1 limit 5
---- RESULTS
1,1
2,2
3,3
4,4
5,5
---- TYPES
BIGINT, DOUBLE
====
---- QUERY
set num_nodes=0;
set max_block_mgr_memory=100m;
select count(l1.l_tax)
from
lineitem l1,
lineitem l2,
lineitem l3
where
l1.l_tax < 0.01 and
l2.l_tax < 0.04 and
l1.l_orderkey = l2.l_orderkey and
l1.l_orderkey = l3.l_orderkey and
l1.l_comment = l3.l_comment and
l1.l_shipdate = l3.l_shipdate
---- RESULTS
1846743
---- TYPES
BIGINT
====
---- QUERY
set num_nodes=0;
set max_block_mgr_memory=40m;
select max(t1.total_count), max(t1.l_shipinstruct), max(t1.l_comment) from
(select l_shipinstruct, l_comment, count(*) over () total_count from lineitem) t1
---- RESULTS
6001215,'TAKE BACK RETURN','zzle? slyly final platelets sleep quickly. '
---- TYPES
BIGINT, STRING, STRING
---- QUERY
# Run this query with very low memory. Since the tables are small, the PA/PHJ should be
# using buffers much smaller than the io buffer.
set max_block_mgr_memory=10m;
select a.int_col, count(*)
from functional.alltypessmall a, functional.alltypessmall b, functional.alltypessmall c
where a.id = b.id and b.id = c.id group by a.int_col
---- RESULTS
0,12
1,12
2,12
3,12
4,12
5,8
6,8
7,8
8,8
9,8
---- TYPES
INT, BIGINT
====
---- QUERY: TPCH-Q21
# Adding TPCH-Q21 in the spilling test to check for IMPALA-1471 (spilling left anti
# and left outer joins were returning wrong results).
# Q21 - Suppliers Who Kept Orders Waiting Query
set num_nodes=0;
set max_block_mgr_memory=100m;
select
s_name,
count(*) as numwait
from
supplier,
lineitem l1,
orders,
nation
where
s_suppkey = l1.l_suppkey
and o_orderkey = l1.l_orderkey
and o_orderstatus = 'F'
and l1.l_receiptdate > l1.l_commitdate
and exists (
select
*
from
lineitem l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select
*
from
lineitem l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
and l3.l_receiptdate > l3.l_commitdate
)
and s_nationkey = n_nationkey
and n_name = 'SAUDI ARABIA'
group by
s_name
order by
numwait desc,
s_name
limit 100
---- RESULTS
'Supplier#000002829',20
'Supplier#000005808',18
'Supplier#000000262',17
'Supplier#000000496',17
'Supplier#000002160',17
'Supplier#000002301',17
'Supplier#000002540',17
'Supplier#000003063',17
'Supplier#000005178',17
'Supplier#000008331',17
'Supplier#000002005',16
'Supplier#000002095',16
'Supplier#000005799',16
'Supplier#000005842',16
'Supplier#000006450',16
'Supplier#000006939',16
'Supplier#000009200',16
'Supplier#000009727',16
'Supplier#000000486',15
'Supplier#000000565',15
'Supplier#000001046',15
'Supplier#000001047',15
'Supplier#000001161',15
'Supplier#000001336',15
'Supplier#000001435',15
'Supplier#000003075',15
'Supplier#000003335',15
'Supplier#000005649',15
'Supplier#000006027',15
'Supplier#000006795',15
'Supplier#000006800',15
'Supplier#000006824',15
'Supplier#000007131',15
'Supplier#000007382',15
'Supplier#000008913',15
'Supplier#000009787',15
'Supplier#000000633',14
'Supplier#000001960',14
'Supplier#000002323',14
'Supplier#000002490',14
'Supplier#000002993',14
'Supplier#000003101',14
'Supplier#000004489',14
'Supplier#000005435',14
'Supplier#000005583',14
'Supplier#000005774',14
'Supplier#000007579',14
'Supplier#000008180',14
'Supplier#000008695',14
'Supplier#000009224',14
'Supplier#000000357',13
'Supplier#000000436',13
'Supplier#000000610',13
'Supplier#000000788',13
'Supplier#000000889',13
'Supplier#000001062',13
'Supplier#000001498',13
'Supplier#000002056',13
'Supplier#000002312',13
'Supplier#000002344',13
'Supplier#000002596',13
'Supplier#000002615',13
'Supplier#000002978',13
'Supplier#000003048',13
'Supplier#000003234',13
'Supplier#000003727',13
'Supplier#000003806',13
'Supplier#000004472',13
'Supplier#000005236',13
'Supplier#000005906',13
'Supplier#000006241',13
'Supplier#000006326',13
'Supplier#000006384',13
'Supplier#000006394',13
'Supplier#000006624',13
'Supplier#000006629',13
'Supplier#000006682',13
'Supplier#000006737',13
'Supplier#000006825',13
'Supplier#000007021',13
'Supplier#000007417',13
'Supplier#000007497',13
'Supplier#000007602',13
'Supplier#000008134',13
'Supplier#000008234',13
'Supplier#000009435',13
'Supplier#000009436',13
'Supplier#000009564',13
'Supplier#000009896',13
'Supplier#000000379',12
'Supplier#000000673',12
'Supplier#000000762',12
'Supplier#000000811',12
'Supplier#000000821',12
'Supplier#000001337',12
'Supplier#000001916',12
'Supplier#000001925',12
'Supplier#000002039',12
'Supplier#000002357',12
'Supplier#000002483',12
---- TYPES
string, bigint
====
---- QUERY
# Test aggregation spill with group_concat distinct
# TODO: get this to spill.
set num_nodes=1;
set max_block_mgr_memory=265m;
select l_orderkey, count(*), group_concat(distinct l_linestatus, '|')
from lineitem
group by 1
order by 1 limit 10
---- RESULTS
1,6,'O'
2,1,'O'
3,6,'F'
4,1,'O'
5,3,'F'
6,1,'F'
7,7,'O'
32,6,'O'
33,4,'F'
34,3,'O'
---- TYPES
BIGINT, BIGINT, STRING
====
---- QUERY
# Regression test for IMPALA-2612. The following query will cause CastToChar
# to be invoked when building the hash tables in partitioned aggregation
# nodes. CastToChar will do "local" memory allocation. Without the fix of
# IMPALA-2612, the peak memory consumption will be higher.
set mem_limit=800m;
set num_nodes=1;
set num_scanner_threads=1;
select count(distinct concat(cast(l_comment as char(120)), cast(l_comment as char(120)),
cast(l_comment as char(120)), cast(l_comment as char(120)),
cast(l_comment as char(120)), cast(l_comment as char(120))))
from lineitem
---- RESULTS
4502054
---- TYPES
BIGINT
====
---- QUERY
set mem_limit=0;
set num_nodes=0;
====