mirror of
https://github.com/apache/impala.git
synced 2026-01-29 03:00:27 -05:00
Always create global BufferPool at startup using 80% of memory and limit reservations to 80% of query memory (same as BufferedBlockMgr). The query's initial reservation is computed in the planner, claimed centrally (managed by the InitialReservations class) and distributed to query operators from there. min_spillable_buffer_size and default_spillable_buffer_size query options control the buffer size that the planner selects for spilling operators. Port ExecNodes to use BufferPool: * Each ExecNode has to claim its reservation during Open() * Port Sorter to use BufferPool. * Switch from BufferedTupleStream to BufferedTupleStreamV2 * Port HashTable to use BufferPool via a Suballocator. This also makes PAGG memory consumption more efficient (avoid wasting buffers) and improve the spilling algorithm: * Allow preaggs to execute with 0 reservation - if streams and hash tables cannot be allocated, it will pass through rows. * Halve the buffer requirement for spilling aggs - avoid allocating buffers for aggregated and unaggregated streams simultaneously. * Rebuild spilled partitions instead of repartitioning (IMPALA-2708) TODO in follow-up patches: * Rename BufferedTupleStreamV2 to BufferedTupleStream * Implement max_row_size query option. Testing: * Updated tests to reflect new memory requirements Change-Id: I7fc7fe1c04e9dfb1a0c749fb56a5e0f2bf9c6c3e Reviewed-on: http://gerrit.cloudera.org:8080/5801 Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com> Tested-by: Impala Public Jenkins
46 lines
1.7 KiB
Plaintext
46 lines
1.7 KiB
Plaintext
====
|
|
---- QUERY
|
|
# IMPALA-4866: Hash join node does not apply limits correctly
|
|
# Test that a join query applies the limits correctly when output_batch gets
|
|
# populated in OutputNullAwareProbe. If the output_batch is full after invoking
|
|
# this function, the ReachedLimit check wont be be correctly applied without this
|
|
# change
|
|
set batch_size=6;
|
|
select id, int_col, bigint_col from functional.alltypesagg a
|
|
where int_col not in (select int_col from functional.alltypestiny t
|
|
where a.id = t.id) limit 10995;
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*RowsProduced: 10.99..\W10995\W
|
|
====
|
|
---- QUERY
|
|
# Test to verify that is limit_ is correctly enforced when
|
|
# output_batch is at AtCapacity.
|
|
set batch_size=6;
|
|
set buffer_pool_limit=180m;
|
|
select * from tpch.lineitem t1 full outer join tpch.lineitem t2 on
|
|
t1.l_orderkey = t2.l_orderkey limit 10;
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*RowsProduced: 10 .
|
|
====
|
|
---- QUERY
|
|
# IMPALA-4866: Hash join node does not apply limits correctly
|
|
# Test to ensure that the limit is correctly applied on a right
|
|
# join. Without this change this query returns 10 rows(batch_size)
|
|
set batch_size=10;
|
|
select straight_join t1.id, t2.id from functional.alltypes t1
|
|
right join functional.alltypes t2 on t1.id = t2.int_col + 100000
|
|
limit 5;
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*RowsProduced: 5 .
|
|
====
|
|
---- QUERY
|
|
# IMPALA-4866: Hash join node does not apply limits correctly
|
|
# Test to ensure that the limit is correctly applied on a inner
|
|
# join. Without this change this query returns 10 rows(batch_size)
|
|
set batch_size=10;
|
|
select straight_join t1.id, t2.id from functional.alltypes t1
|
|
inner join functional.alltypes t2 on t1.id = t2.id limit 5;
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*RowsProduced: 5 .
|
|
====
|