mirror of
https://github.com/apache/impala.git
synced 2026-01-07 09:02:19 -05:00
This fixes a regression introduced with: IMPALA-1621,2241,2271,2330,2352: Lazy switch to IO buffers to reduce min mem needed for PAGG/PHJ Prior to that change, as soon as any partition's stream overflowed its small buffers, all partitions' streams would be switched immediately to IO-buffers, which would be satisfied by the initial buffer "reservation". After that change, individual streams are switched to IO-buffers on demand as they overflow their small buffers. However, that change also made it so that Partition::Spill() would eagerly switch that partition's streams to IO-buffers, and fail the query if the buffer is not available. The buffer may not be available because the reserved buffers may be in use by other partition's streams. We don't need to fail the query if the switch to IO-buffers in Partition::Spill() fails. Instead, we should just let the streams switch on demand as they fill up the small buffers. When that happens, if the IO buffer is not available, then we already have a mechanism to pick partitions to spill until we can get the IO-buffer (in the worst case it means working our way back down to the initial reservation). See AppendRowStreamFull() and BuildHashTables(). The symptom of this regression was that some queries would fail at a lower memory limit than before. Also revert the max_block_mgr_memory values back to their originals. Additional testing: loop custom_cluster/spilling.py. We should also remeasure minimum memory required by queries after this change. Change-Id: I11add15540606d42cd64f2af99f4e96140ae8bb5 Reviewed-on: http://gerrit.cloudera.org:8080/1228 Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com> Tested-by: Internal Jenkins
336 lines
6.8 KiB
Plaintext
336 lines
6.8 KiB
Plaintext
====
|
|
---- QUERY
|
|
set num_nodes=1;
|
|
set max_block_mgr_memory=265m;
|
|
select l_orderkey, count(*)
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 10
|
|
---- RESULTS
|
|
1,6
|
|
2,1
|
|
3,6
|
|
4,1
|
|
5,3
|
|
6,1
|
|
7,7
|
|
32,6
|
|
33,4
|
|
34,3
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test query with string grouping column and string agg columns
|
|
set num_nodes=1;
|
|
set max_block_mgr_memory=275m;
|
|
select l_returnflag, l_orderkey, avg(l_tax), min(l_shipmode)
|
|
from lineitem
|
|
group by 1,2
|
|
order by 1,2 limit 3
|
|
---- RESULTS
|
|
'A',3,0.05,'RAIL'
|
|
'A',5,0.03,'AIR'
|
|
'A',6,0.03,'TRUCK'
|
|
---- TYPES
|
|
STRING, BIGINT, DECIMAL, STRING
|
|
====
|
|
---- QUERY
|
|
# TODO: the distributed agg tests are highly likely of becoming flaking. The memory is
|
|
# split between the local and merge agg in an uncontrolled way, which can lead to
|
|
# pathological cases causing the queries to fail with OOM. These cases are expected
|
|
# to be unlikely with reasonable limits but we are explicitly trying to pick very low
|
|
# limits (IMPALA-1305).
|
|
set max_block_mgr_memory=275m;
|
|
select l_orderkey, count(*)
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 10;
|
|
---- RESULTS
|
|
1,6
|
|
2,1
|
|
3,6
|
|
4,1
|
|
5,3
|
|
6,1
|
|
7,7
|
|
32,6
|
|
33,4
|
|
34,3
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test query with string grouping column
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=275m;
|
|
select l_comment, count(*)
|
|
from lineitem
|
|
group by 1
|
|
order by count(*) desc limit 5
|
|
---- RESULTS
|
|
' furiously',943
|
|
' carefully',893
|
|
' carefully ',875
|
|
'carefully ',854
|
|
' furiously ',845
|
|
---- TYPES
|
|
STRING, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Disabled due to IMPALA-1305.
|
|
# Test query with string grouping column and string agg columns
|
|
#set num_nodes=0;
|
|
#set max_block_mgr_memory=80m;
|
|
#select l_returnflag, l_orderkey, round(avg(l_tax),2), min(l_shipmode)
|
|
#from lineitem
|
|
#group by 1,2
|
|
#order by 1,2 limit 3;
|
|
#---- RESULTS
|
|
#'A',3,0.05,'RAIL'
|
|
#'A',5,0.03,'AIR'
|
|
#'A',6,0.03,'TRUCK'
|
|
#---- TYPES
|
|
#STRING, BIGINT, DOUBLE, STRING
|
|
#====
|
|
#
|
|
# Test query with string intermediate state.
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=275m;
|
|
select l_orderkey, avg(l_orderkey)
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 5
|
|
---- RESULTS
|
|
1,1
|
|
2,2
|
|
3,3
|
|
4,4
|
|
5,5
|
|
---- TYPES
|
|
BIGINT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=100m;
|
|
select count(l1.l_tax)
|
|
from
|
|
lineitem l1,
|
|
lineitem l2,
|
|
lineitem l3
|
|
where
|
|
l1.l_tax < 0.01 and
|
|
l2.l_tax < 0.04 and
|
|
l1.l_orderkey = l2.l_orderkey and
|
|
l1.l_orderkey = l3.l_orderkey and
|
|
l1.l_comment = l3.l_comment and
|
|
l1.l_shipdate = l3.l_shipdate
|
|
---- RESULTS
|
|
1846743
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=40m;
|
|
select max(t1.total_count), max(t1.l_shipinstruct), max(t1.l_comment) from
|
|
(select l_shipinstruct, l_comment, count(*) over () total_count from lineitem) t1
|
|
---- RESULTS
|
|
6001215,'TAKE BACK RETURN','zzle? slyly final platelets sleep quickly. '
|
|
---- TYPES
|
|
BIGINT, STRING, STRING
|
|
---- QUERY
|
|
# Run this query with very low memory. Since the tables are small, the PA/PHJ should be
|
|
# using buffers much smaller than the io buffer.
|
|
set max_block_mgr_memory=10m;
|
|
select a.int_col, count(*)
|
|
from functional.alltypessmall a, functional.alltypessmall b, functional.alltypessmall c
|
|
where a.id = b.id and b.id = c.id group by a.int_col
|
|
---- RESULTS
|
|
0,12
|
|
1,12
|
|
2,12
|
|
3,12
|
|
4,12
|
|
5,8
|
|
6,8
|
|
7,8
|
|
8,8
|
|
9,8
|
|
---- TYPES
|
|
INT, BIGINT
|
|
====
|
|
---- QUERY: TPCH-Q21
|
|
# Adding TPCH-Q21 in the spilling test to check for IMPALA-1471 (spilling left anti
|
|
# and left outer joins were returning wrong results).
|
|
# Q21 - Suppliers Who Kept Orders Waiting Query
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=100m;
|
|
select
|
|
s_name,
|
|
count(*) as numwait
|
|
from
|
|
supplier,
|
|
lineitem l1,
|
|
orders,
|
|
nation
|
|
where
|
|
s_suppkey = l1.l_suppkey
|
|
and o_orderkey = l1.l_orderkey
|
|
and o_orderstatus = 'F'
|
|
and l1.l_receiptdate > l1.l_commitdate
|
|
and exists (
|
|
select
|
|
*
|
|
from
|
|
lineitem l2
|
|
where
|
|
l2.l_orderkey = l1.l_orderkey
|
|
and l2.l_suppkey <> l1.l_suppkey
|
|
)
|
|
and not exists (
|
|
select
|
|
*
|
|
from
|
|
lineitem l3
|
|
where
|
|
l3.l_orderkey = l1.l_orderkey
|
|
and l3.l_suppkey <> l1.l_suppkey
|
|
and l3.l_receiptdate > l3.l_commitdate
|
|
)
|
|
and s_nationkey = n_nationkey
|
|
and n_name = 'SAUDI ARABIA'
|
|
group by
|
|
s_name
|
|
order by
|
|
numwait desc,
|
|
s_name
|
|
limit 100
|
|
---- RESULTS
|
|
'Supplier#000002829',20
|
|
'Supplier#000005808',18
|
|
'Supplier#000000262',17
|
|
'Supplier#000000496',17
|
|
'Supplier#000002160',17
|
|
'Supplier#000002301',17
|
|
'Supplier#000002540',17
|
|
'Supplier#000003063',17
|
|
'Supplier#000005178',17
|
|
'Supplier#000008331',17
|
|
'Supplier#000002005',16
|
|
'Supplier#000002095',16
|
|
'Supplier#000005799',16
|
|
'Supplier#000005842',16
|
|
'Supplier#000006450',16
|
|
'Supplier#000006939',16
|
|
'Supplier#000009200',16
|
|
'Supplier#000009727',16
|
|
'Supplier#000000486',15
|
|
'Supplier#000000565',15
|
|
'Supplier#000001046',15
|
|
'Supplier#000001047',15
|
|
'Supplier#000001161',15
|
|
'Supplier#000001336',15
|
|
'Supplier#000001435',15
|
|
'Supplier#000003075',15
|
|
'Supplier#000003335',15
|
|
'Supplier#000005649',15
|
|
'Supplier#000006027',15
|
|
'Supplier#000006795',15
|
|
'Supplier#000006800',15
|
|
'Supplier#000006824',15
|
|
'Supplier#000007131',15
|
|
'Supplier#000007382',15
|
|
'Supplier#000008913',15
|
|
'Supplier#000009787',15
|
|
'Supplier#000000633',14
|
|
'Supplier#000001960',14
|
|
'Supplier#000002323',14
|
|
'Supplier#000002490',14
|
|
'Supplier#000002993',14
|
|
'Supplier#000003101',14
|
|
'Supplier#000004489',14
|
|
'Supplier#000005435',14
|
|
'Supplier#000005583',14
|
|
'Supplier#000005774',14
|
|
'Supplier#000007579',14
|
|
'Supplier#000008180',14
|
|
'Supplier#000008695',14
|
|
'Supplier#000009224',14
|
|
'Supplier#000000357',13
|
|
'Supplier#000000436',13
|
|
'Supplier#000000610',13
|
|
'Supplier#000000788',13
|
|
'Supplier#000000889',13
|
|
'Supplier#000001062',13
|
|
'Supplier#000001498',13
|
|
'Supplier#000002056',13
|
|
'Supplier#000002312',13
|
|
'Supplier#000002344',13
|
|
'Supplier#000002596',13
|
|
'Supplier#000002615',13
|
|
'Supplier#000002978',13
|
|
'Supplier#000003048',13
|
|
'Supplier#000003234',13
|
|
'Supplier#000003727',13
|
|
'Supplier#000003806',13
|
|
'Supplier#000004472',13
|
|
'Supplier#000005236',13
|
|
'Supplier#000005906',13
|
|
'Supplier#000006241',13
|
|
'Supplier#000006326',13
|
|
'Supplier#000006384',13
|
|
'Supplier#000006394',13
|
|
'Supplier#000006624',13
|
|
'Supplier#000006629',13
|
|
'Supplier#000006682',13
|
|
'Supplier#000006737',13
|
|
'Supplier#000006825',13
|
|
'Supplier#000007021',13
|
|
'Supplier#000007417',13
|
|
'Supplier#000007497',13
|
|
'Supplier#000007602',13
|
|
'Supplier#000008134',13
|
|
'Supplier#000008234',13
|
|
'Supplier#000009435',13
|
|
'Supplier#000009436',13
|
|
'Supplier#000009564',13
|
|
'Supplier#000009896',13
|
|
'Supplier#000000379',12
|
|
'Supplier#000000673',12
|
|
'Supplier#000000762',12
|
|
'Supplier#000000811',12
|
|
'Supplier#000000821',12
|
|
'Supplier#000001337',12
|
|
'Supplier#000001916',12
|
|
'Supplier#000001925',12
|
|
'Supplier#000002039',12
|
|
'Supplier#000002357',12
|
|
'Supplier#000002483',12
|
|
---- TYPES
|
|
string, bigint
|
|
====
|
|
---- QUERY
|
|
# Test aggregation spill with group_concat distinct
|
|
# TODO: get this to spill.
|
|
set num_nodes=1;
|
|
set max_block_mgr_memory=265m;
|
|
select l_orderkey, count(*), group_concat(distinct l_linestatus, '|')
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 10
|
|
---- RESULTS
|
|
1,6,'O'
|
|
2,1,'O'
|
|
3,6,'F'
|
|
4,1,'O'
|
|
5,3,'F'
|
|
6,1,'F'
|
|
7,7,'O'
|
|
32,6,'O'
|
|
33,4,'F'
|
|
34,3,'O'
|
|
---- TYPES
|
|
BIGINT, BIGINT, STRING
|
|
====
|