Files
impala/testdata/workloads/functional-query/queries/QueryTest/spilling.test
Ippokratis Pandis d58aedff42 IMPALA-1820: Start with small pages for hash tables during repartitioning
The change of the PARTITION_FANOUT from 32 to 16 exposed a pathological case due to
the lack of coordination across concurrently executing spilling nodes of the same query.
In particular, when we repartition a partition we try to initialize hash tables for the
new partitions. But each hash table needs a block (for the nodes). In case there were not
any IO-sized blocks available, because they had been consumed by other nodes, we would get
into a loop trying to repartition those smaller partitions that couldn't initialize their
hash table. Additional repartitions that, among others, would need additional blocks for
the new streams. These partitions would end up being very small, still we would fail the
query when we were reaching the MAX_PARTITION_DEPTH limit, which was fixed to 4.

This patch fixes the problem by initializing the hash tables during repartitions with
small pages. That is, the hash tables always first use a 64KB and a 512KB block for their
nodes before switching to IO-sized blocks. This helps the partitioning algorithm to
finish when we end up with partitions that can fit in those small pages. The performance
may not be optimal, still the memory consumption is lower and the algorithm finishes. For
example, without this patch and with PARTITION_FANOUT == 16 in order to run TPC-H Q18 and
Q20 we needed 3.4GB and 3.1GB respectively. With this patch TPC-H Q18 needs ~1GB and Q20
975MB.

This patch also removes the restriction of stopping repartitioning when we are reaching
4 levels of repartitioning. Instead, whenever we repartition we compare the size of
the input partition to the size of the largest new partition. If there is no reduction
on the size we stop the algorithm. Otherwise, we keep on repartitioning. That should
help in cases of skew (e.g. due to bad hashing). There is a new MAX_PARTITION_DEPTH limit
of 16. It is very unlikely we will ever hit this limit.

Change-Id: Ib33fece10585448bc2d07bb39d0535d78b168ccc
Reviewed-on: http://gerrit.cloudera.org:8080/119
Reviewed-by: Ippokratis Pandis <ipandis@cloudera.com>
Tested-by: Internal Jenkins
2015-02-28 00:42:04 +00:00

313 lines
6.4 KiB
Plaintext

====
---- QUERY
set num_nodes=1;
set max_block_mgr_memory=265m;
select l_orderkey, count(*)
from lineitem
group by 1
order by 1 limit 10
---- RESULTS
1,6
2,1
3,6
4,1
5,3
6,1
7,7
32,6
33,4
34,3
---- TYPES
BIGINT, BIGINT
====
---- QUERY
# Test query with string grouping column and string agg columns
set num_nodes=1;
set max_block_mgr_memory=275m;
select l_returnflag, l_orderkey, avg(l_tax), min(l_shipmode)
from lineitem
group by 1,2
order by 1,2 limit 3
---- RESULTS
'A',3,0.05,'RAIL'
'A',5,0.03,'AIR'
'A',6,0.03,'TRUCK'
---- TYPES
STRING, BIGINT, DECIMAL, STRING
====
---- QUERY
# TODO: the distributed agg tests are highly likely of becoming flaking. The memory is
# split between the local and merge agg in an uncontrolled way, which can lead to
# pathological cases causing the queries to fail with OOM. These cases are expected
# to be unlikely with reasonable limits but we are explicitly trying to pick very low
# limits (IMPALA-1305).
set max_block_mgr_memory=275m;
select l_orderkey, count(*)
from lineitem
group by 1
order by 1 limit 10;
---- RESULTS
1,6
2,1
3,6
4,1
5,3
6,1
7,7
32,6
33,4
34,3
---- TYPES
BIGINT, BIGINT
====
---- QUERY
# Test query with string grouping column
set num_nodes=0;
set max_block_mgr_memory=275m;
select l_comment, count(*)
from lineitem
group by 1
order by count(*) desc limit 5
---- RESULTS
' furiously',943
' carefully',893
' carefully ',875
'carefully ',854
' furiously ',845
---- TYPES
STRING, BIGINT
====
---- QUERY
# Disabled due to IMPALA-1305.
# Test query with string grouping column and string agg columns
#set num_nodes=0;
#set max_block_mgr_memory=80m;
#select l_returnflag, l_orderkey, round(avg(l_tax),2), min(l_shipmode)
#from lineitem
#group by 1,2
#order by 1,2 limit 3;
#---- RESULTS
#'A',3,0.05,'RAIL'
#'A',5,0.03,'AIR'
#'A',6,0.03,'TRUCK'
#---- TYPES
#STRING, BIGINT, DOUBLE, STRING
#====
#
# Test query with string intermediate state.
set num_nodes=0;
set max_block_mgr_memory=275m;
select l_orderkey, avg(l_orderkey)
from lineitem
group by 1
order by 1 limit 5
---- RESULTS
1,1
2,2
3,3
4,4
5,5
---- TYPES
BIGINT, DOUBLE
====
---- QUERY
set num_nodes=0;
set max_block_mgr_memory=100m;
select count(l1.l_tax)
from
lineitem l1,
lineitem l2,
lineitem l3
where
l1.l_tax < 0.01 and
l2.l_tax < 0.04 and
l1.l_orderkey = l2.l_orderkey and
l1.l_orderkey = l3.l_orderkey and
l1.l_comment = l3.l_comment and
l1.l_shipdate = l3.l_shipdate
---- RESULTS
1846743
---- TYPES
BIGINT
====
---- QUERY
set num_nodes=0;
set max_block_mgr_memory=45m;
select max(t1.total_count), max(t1.l_shipinstruct), max(t1.l_comment) from
(select l_shipinstruct, l_comment, count(*) over () total_count from lineitem) t1
---- RESULTS
6001215,'TAKE BACK RETURN','zzle? slyly final platelets sleep quickly. '
---- TYPES
BIGINT, STRING, STRING
---- QUERY
# Run this query with very low memory. Since the tables are small, the PA/PHJ should be
# using buffers much smaller than the io buffer.
set max_block_mgr_memory=10m;
select a.int_col, count(*)
from functional.alltypessmall a, functional.alltypessmall b, functional.alltypessmall c
where a.id = b.id and b.id = c.id group by a.int_col;
---- RESULTS
0,12
1,12
2,12
3,12
4,12
5,8
6,8
7,8
8,8
9,8
---- TYPES
INT, BIGINT
====
---- QUERY: TPCH-Q21
# Adding TPCH-Q21 in the spilling test to check for IMPALA-1471 (spilling left anti
# and left outer joins were returning wrong results).
# Q21 - Suppliers Who Kept Orders Waiting Query
set num_nodes=0;
set max_block_mgr_memory=100m;
select
s_name,
count(*) as numwait
from
supplier,
lineitem l1,
orders,
nation
where
s_suppkey = l1.l_suppkey
and o_orderkey = l1.l_orderkey
and o_orderstatus = 'F'
and l1.l_receiptdate > l1.l_commitdate
and exists (
select
*
from
lineitem l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select
*
from
lineitem l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
and l3.l_receiptdate > l3.l_commitdate
)
and s_nationkey = n_nationkey
and n_name = 'SAUDI ARABIA'
group by
s_name
order by
numwait desc,
s_name
limit 100
---- RESULTS
'Supplier#000002829',20
'Supplier#000005808',18
'Supplier#000000262',17
'Supplier#000000496',17
'Supplier#000002160',17
'Supplier#000002301',17
'Supplier#000002540',17
'Supplier#000003063',17
'Supplier#000005178',17
'Supplier#000008331',17
'Supplier#000002005',16
'Supplier#000002095',16
'Supplier#000005799',16
'Supplier#000005842',16
'Supplier#000006450',16
'Supplier#000006939',16
'Supplier#000009200',16
'Supplier#000009727',16
'Supplier#000000486',15
'Supplier#000000565',15
'Supplier#000001046',15
'Supplier#000001047',15
'Supplier#000001161',15
'Supplier#000001336',15
'Supplier#000001435',15
'Supplier#000003075',15
'Supplier#000003335',15
'Supplier#000005649',15
'Supplier#000006027',15
'Supplier#000006795',15
'Supplier#000006800',15
'Supplier#000006824',15
'Supplier#000007131',15
'Supplier#000007382',15
'Supplier#000008913',15
'Supplier#000009787',15
'Supplier#000000633',14
'Supplier#000001960',14
'Supplier#000002323',14
'Supplier#000002490',14
'Supplier#000002993',14
'Supplier#000003101',14
'Supplier#000004489',14
'Supplier#000005435',14
'Supplier#000005583',14
'Supplier#000005774',14
'Supplier#000007579',14
'Supplier#000008180',14
'Supplier#000008695',14
'Supplier#000009224',14
'Supplier#000000357',13
'Supplier#000000436',13
'Supplier#000000610',13
'Supplier#000000788',13
'Supplier#000000889',13
'Supplier#000001062',13
'Supplier#000001498',13
'Supplier#000002056',13
'Supplier#000002312',13
'Supplier#000002344',13
'Supplier#000002596',13
'Supplier#000002615',13
'Supplier#000002978',13
'Supplier#000003048',13
'Supplier#000003234',13
'Supplier#000003727',13
'Supplier#000003806',13
'Supplier#000004472',13
'Supplier#000005236',13
'Supplier#000005906',13
'Supplier#000006241',13
'Supplier#000006326',13
'Supplier#000006384',13
'Supplier#000006394',13
'Supplier#000006624',13
'Supplier#000006629',13
'Supplier#000006682',13
'Supplier#000006737',13
'Supplier#000006825',13
'Supplier#000007021',13
'Supplier#000007417',13
'Supplier#000007497',13
'Supplier#000007602',13
'Supplier#000008134',13
'Supplier#000008234',13
'Supplier#000009435',13
'Supplier#000009436',13
'Supplier#000009564',13
'Supplier#000009896',13
'Supplier#000000379',12
'Supplier#000000673',12
'Supplier#000000762',12
'Supplier#000000811',12
'Supplier#000000821',12
'Supplier#000001337',12
'Supplier#000001916',12
'Supplier#000001925',12
'Supplier#000002039',12
'Supplier#000002357',12
'Supplier#000002483',12
---- TYPES
string, bigint
====