Files
impala/testdata/workloads/functional-query/queries/QueryTest/spilling-aggs.test
Tim Armstrong 8609b09a95 IMPALA-5681: release reservation from blocking operators
When an in-memory blocking aggregation or join is in the GetNext()
phase where it is outputting accumulated rows then we expect
memory consumption to monotonically decrease because no more
rows will be accumulated in memory.

This change adds support to release unused reservation and makes
use of it for in-memory aggregations and sorts.

We don't release memory for operators with spilled data, since they
may need the reservation to bring it back into memory. We also
don't release memory in subplans, since it will probably be used
in a later iteration of the subplan.

Testing:
Updated spilling test that now requires less memory.

Ran stress test binary search on tpch_parquet. No changes, except
Q18 now requires 325MB instead of 450MB to execute without spilling.

Ran query with two sorts in the same pipeline and watched /memz to
confirm that the first node in the pipeline was incrementally releasing
memory. Added a regression test based on this experiment.

Added a backend test to directly test reservation decreasing.

Change-Id: I6f4d0ad127d5fcd14b9821a7c127eec11d98692f
Reviewed-on: http://gerrit.cloudera.org:8080/7619
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Impala Public Jenkins
2017-08-17 20:17:48 +00:00

203 lines
4.8 KiB
Plaintext

====
---- QUERY
set buffer_pool_limit=10m;
select l_orderkey, count(*)
from lineitem
group by 1
order by 1 limit 10
---- RESULTS
1,6
2,1
3,6
4,1
5,3
6,1
7,7
32,6
33,4
34,3
---- TYPES
BIGINT, BIGINT
---- RUNTIME_PROFILE
# Verify that spilling and passthrough were activated.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
====
---- QUERY
# Test query with string grouping column and string agg columns
set buffer_pool_limit=10m;
set num_nodes=1;
select l_returnflag, l_orderkey, avg(l_tax), min(l_shipmode)
from lineitem
group by 1,2
order by 1,2 limit 3
---- RESULTS
'A',3,0.05,'RAIL'
'A',5,0.03,'AIR'
'A',6,0.03,'TRUCK'
---- TYPES
STRING, BIGINT, DECIMAL, STRING
---- RUNTIME_PROFILE
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
====
---- QUERY
set buffer_pool_limit=10m;
select l_orderkey, count(*)
from lineitem
group by 1
order by 1 limit 10;
---- RESULTS
1,6
2,1
3,6
4,1
5,3
6,1
7,7
32,6
33,4
34,3
---- TYPES
BIGINT, BIGINT
---- RUNTIME_PROFILE
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
====
---- QUERY
# Test query with string grouping column
set buffer_pool_limit=10m;
set num_nodes=1;
select l_comment, count(*)
from lineitem
group by 1
order by count(*) desc limit 5
---- RESULTS
' furiously',943
' carefully',893
' carefully ',875
'carefully ',854
' furiously ',845
---- TYPES
STRING, BIGINT
---- RUNTIME_PROFILE
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# Test query with string grouping column and string agg columns
set buffer_pool_limit=10m;
set num_nodes=1;
select l_returnflag, l_orderkey, round(avg(l_tax),2), min(l_shipmode)
from lineitem
group by 1,2
order by 1,2 limit 3;
---- RESULTS
'A',3,0.05,'RAIL'
'A',5,0.03,'AIR'
'A',6,0.03,'TRUCK'
---- TYPES
STRING, BIGINT, DECIMAL, STRING
---- RUNTIME_PROFILE
# Verify that spilling happened in the agg.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# Test with non-scalar intermediate state (avg() uses fixed intermediate value).
set buffer_pool_limit=10m;
select l_orderkey, avg(l_orderkey)
from lineitem
group by 1
order by 1 limit 5
---- RESULTS
1,1
2,2
3,3
4,4
5,5
---- TYPES
BIGINT, DOUBLE
---- RUNTIME_PROFILE
# Verify that passthrough and spilling happened in the pre and merge agg.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
====
---- QUERY
# Test aggregation spill with group_concat distinct
set buffer_pool_limit=30m;
select l_orderkey, count(*), group_concat(distinct l_linestatus, '|')
from lineitem
group by 1
order by 1 limit 10
---- RESULTS
1,6,'O'
2,1,'O'
3,6,'F'
4,1,'O'
5,3,'F'
6,1,'F'
7,7,'O'
32,6,'O'
33,4,'F'
34,3,'O'
---- TYPES
BIGINT, BIGINT, STRING
---- RUNTIME_PROFILE
# Verify that at least one of the aggs spilled.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# Regression test for IMPALA-2612. The following query will cause CastToChar
# to be invoked when building the hash tables in partitioned aggregation
# nodes. CastToChar will do "local" memory allocation. Without the fix of
# IMPALA-2612, the peak memory consumption will be higher.
set mem_limit=800m;
set num_scanner_threads=1;
select count(distinct concat(cast(l_comment as char(120)), cast(l_comment as char(120)),
cast(l_comment as char(120)), cast(l_comment as char(120)),
cast(l_comment as char(120)), cast(l_comment as char(120))))
from lineitem
---- RESULTS
4502054
---- TYPES
BIGINT
---- RUNTIME_PROFILE
# Verify that the agg spilled.
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# Test aggregation with minimum required reservation to exercise IMPALA-2708.
# Merge aggregation requires 17 buffers. The buffer size is 256k for this test.
set buffer_pool_limit=4352k;
select count(*)
from (select distinct * from orders) t
---- TYPES
BIGINT
---- RESULTS
1500000
---- RUNTIME_PROFILE
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
====
---- QUERY
# IMPALA-3304: test that avg() can spill with a query mem limit.
# This test only covers that use FIXED_UDA_INTERMEDIATE, not functions that allocate
# strings for intermediate values.
set mem_limit=200m;
select l_orderkey, avg(l_tax), avg(l_quantity), avg(l_discount), avg(l_extendedprice)
from tpch_parquet.lineitem
group by 1
order by avg(l_extendedprice) desc, avg(l_discount)
limit 5
---- TYPES
BIGINT,DECIMAL,DECIMAL,DECIMAL,DECIMAL
---- RESULTS
3811460,0.05,50.00,0.05,104899.50
1744195,0.04,50.00,0.09,104649.50
5151266,0.07,50.00,0.00,104449.50
4571042,0.03,50.00,0.09,104399.50
1198304,0.01,50.00,0.02,104299.50
---- RUNTIME_PROFILE
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
====