mirror of
https://github.com/apache/impala.git
synced 2026-01-02 12:00:33 -05:00
Before this change the preaggregation was frequently disabled when running under some memory pressure, e.g. if the aggregation is at the end of a pipeline of joins and those joins eat up all the memory. This can result in huge performance degradation since all rows must then be sent over the network. This change always reserves 16 * (buffer size + 64kb) bytes per preaggregation so that it is always able to build some hash tables and reduce the input somewhat. This has two parts: * Changing the frontend reservation calculation * Removing dead code in the backend that handled the case when the initial partitions and hash tables could not be allocated. Testing: Passes exhaustive tests. Change-Id: I2b544f9ec1a906719e2bbb074743926b95a3a573 Reviewed-on: http://gerrit.cloudera.org:8080/7739 Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com> Tested-by: Impala Public Jenkins
205 lines
4.9 KiB
Plaintext
205 lines
4.9 KiB
Plaintext
====
|
|
---- QUERY
|
|
set buffer_pool_limit=10m;
|
|
select l_orderkey, count(*)
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 10
|
|
---- RESULTS
|
|
1,6
|
|
2,1
|
|
3,6
|
|
4,1
|
|
5,3
|
|
6,1
|
|
7,7
|
|
32,6
|
|
33,4
|
|
34,3
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
---- RUNTIME_PROFILE
|
|
# Verify that spilling and passthrough were activated.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test query with string grouping column and string agg columns
|
|
set buffer_pool_limit=10m;
|
|
set num_nodes=1;
|
|
select l_returnflag, l_orderkey, avg(l_tax), min(l_shipmode)
|
|
from lineitem
|
|
group by 1,2
|
|
order by 1,2 limit 3
|
|
---- RESULTS
|
|
'A',3,0.05,'RAIL'
|
|
'A',5,0.03,'AIR'
|
|
'A',6,0.03,'TRUCK'
|
|
---- TYPES
|
|
STRING, BIGINT, DECIMAL, STRING
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
set buffer_pool_limit=10m;
|
|
select l_orderkey, count(*)
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 10;
|
|
---- RESULTS
|
|
1,6
|
|
2,1
|
|
3,6
|
|
4,1
|
|
5,3
|
|
6,1
|
|
7,7
|
|
32,6
|
|
33,4
|
|
34,3
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test query with string grouping column
|
|
set buffer_pool_limit=10m;
|
|
set num_nodes=1;
|
|
select l_comment, count(*)
|
|
from lineitem
|
|
group by 1
|
|
order by count(*) desc limit 5
|
|
---- RESULTS
|
|
' furiously',943
|
|
' carefully',893
|
|
' carefully ',875
|
|
'carefully ',854
|
|
' furiously ',845
|
|
---- TYPES
|
|
STRING, BIGINT
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test query with string grouping column and string agg columns
|
|
set buffer_pool_limit=10m;
|
|
set num_nodes=1;
|
|
select l_returnflag, l_orderkey, round(avg(l_tax),2), min(l_shipmode)
|
|
from lineitem
|
|
group by 1,2
|
|
order by 1,2 limit 3;
|
|
---- RESULTS
|
|
'A',3,0.05,'RAIL'
|
|
'A',5,0.03,'AIR'
|
|
'A',6,0.03,'TRUCK'
|
|
---- TYPES
|
|
STRING, BIGINT, DECIMAL, STRING
|
|
---- RUNTIME_PROFILE
|
|
# Verify that spilling happened in the agg.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test with non-scalar intermediate state (avg() uses fixed intermediate value).
|
|
set buffer_pool_limit=10m;
|
|
select l_orderkey, avg(l_orderkey)
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 5
|
|
---- RESULTS
|
|
1,1
|
|
2,2
|
|
3,3
|
|
4,4
|
|
5,5
|
|
---- TYPES
|
|
BIGINT, DOUBLE
|
|
---- RUNTIME_PROFILE
|
|
# Verify that passthrough and spilling happened in the pre and merge agg.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test aggregation spill with group_concat distinct
|
|
set buffer_pool_limit=30m;
|
|
select l_orderkey, count(*), group_concat(distinct l_linestatus, '|')
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 10
|
|
---- RESULTS
|
|
1,6,'O'
|
|
2,1,'O'
|
|
3,6,'F'
|
|
4,1,'O'
|
|
5,3,'F'
|
|
6,1,'F'
|
|
7,7,'O'
|
|
32,6,'O'
|
|
33,4,'F'
|
|
34,3,'O'
|
|
---- TYPES
|
|
BIGINT, BIGINT, STRING
|
|
---- RUNTIME_PROFILE
|
|
# Verify that at least one of the aggs spilled.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-2612. The following query will cause CastToChar
|
|
# to be invoked when building the hash tables in partitioned aggregation
|
|
# nodes. CastToChar will do "local" memory allocation. Without the fix of
|
|
# IMPALA-2612, the peak memory consumption will be higher.
|
|
set mem_limit=800m;
|
|
set num_scanner_threads=1;
|
|
select count(distinct concat(cast(l_comment as char(120)), cast(l_comment as char(120)),
|
|
cast(l_comment as char(120)), cast(l_comment as char(120)),
|
|
cast(l_comment as char(120)), cast(l_comment as char(120))))
|
|
from lineitem
|
|
---- RESULTS
|
|
4502054
|
|
---- TYPES
|
|
BIGINT
|
|
---- RUNTIME_PROFILE
|
|
# Verify that the agg spilled.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test aggregation with minimum required reservation to exercise IMPALA-2708.
|
|
# Merge aggregation requires 17 buffers and preaggregation requires 16 buffers
|
|
# plus 1mb of hash tables. The buffer size is 256k for this test.
|
|
set max_row_size=256k;
|
|
set buffer_pool_limit=9472k;
|
|
select count(*)
|
|
from (select distinct * from orders) t
|
|
---- TYPES
|
|
BIGINT
|
|
---- RESULTS
|
|
1500000
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3304: test that avg() can spill with a query mem limit.
|
|
# This test only covers that use FIXED_UDA_INTERMEDIATE, not functions that allocate
|
|
# strings for intermediate values.
|
|
set mem_limit=150m;
|
|
select l_orderkey, avg(l_tax), avg(l_quantity), avg(l_discount), avg(l_extendedprice)
|
|
from tpch_parquet.lineitem
|
|
group by 1
|
|
order by avg(l_extendedprice) desc, avg(l_discount)
|
|
limit 5
|
|
---- TYPES
|
|
BIGINT,DECIMAL,DECIMAL,DECIMAL,DECIMAL
|
|
---- RESULTS
|
|
3811460,0.05,50.00,0.05,104899.50
|
|
1744195,0.04,50.00,0.09,104649.50
|
|
5151266,0.07,50.00,0.00,104449.50
|
|
4571042,0.03,50.00,0.09,104399.50
|
|
1198304,0.01,50.00,0.02,104299.50
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|