mirror of
https://github.com/apache/impala.git
synced 2026-01-05 12:01:11 -05:00
Sorter can have runaway memory consumption as it never frees local allocations made in comparator_.Less(). In addition, it doesn't check for errors generated during expression evaluation so it may keep sorting even after failures have occurred. This change fixes the problem by freeing local allocations for every n invocations of comparator_.Less() where n is the row batch size specified in the query options. Various error checks are also added to return early if any error is encountered. Change-Id: I941729b4836e5dbb827d4313a0b45bc5df2fa8e1 Reviewed-on: http://gerrit.cloudera.org:8080/3116 Reviewed-by: Michael Ho <kwho@cloudera.com> Tested-by: Internal Jenkins
587 lines
18 KiB
Plaintext
587 lines
18 KiB
Plaintext
====
|
|
---- QUERY
|
|
set max_block_mgr_memory=25m;
|
|
select l_orderkey, count(*)
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 10
|
|
---- RESULTS
|
|
1,6
|
|
2,1
|
|
3,6
|
|
4,1
|
|
5,3
|
|
6,1
|
|
7,7
|
|
32,6
|
|
33,4
|
|
34,3
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
---- RUNTIME_PROFILE
|
|
# Verify that spilling and passthrough were activated.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test query with string grouping column and string agg columns
|
|
# Could only get it to spill reliably with num_nodes=1.
|
|
# TODO: revisit with new buffer pool.
|
|
set num_nodes=1;
|
|
set max_block_mgr_memory=25m;
|
|
select l_returnflag, l_orderkey, avg(l_tax), min(l_shipmode)
|
|
from lineitem
|
|
group by 1,2
|
|
order by 1,2 limit 3
|
|
---- RESULTS
|
|
'A',3,0.05,'RAIL'
|
|
'A',5,0.03,'AIR'
|
|
'A',6,0.03,'TRUCK'
|
|
---- TYPES
|
|
STRING, BIGINT, DECIMAL, STRING
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
set max_block_mgr_memory=25m;
|
|
select l_orderkey, count(*)
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 10;
|
|
---- RESULTS
|
|
1,6
|
|
2,1
|
|
3,6
|
|
4,1
|
|
5,3
|
|
6,1
|
|
7,7
|
|
32,6
|
|
33,4
|
|
34,3
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test query with string grouping column
|
|
# Could only get it to spill reliably with num_nodes=1.
|
|
# TODO: revisit with new buffer pool.
|
|
set num_nodes=1;
|
|
set max_block_mgr_memory=25m;
|
|
select l_comment, count(*)
|
|
from lineitem
|
|
group by 1
|
|
order by count(*) desc limit 5
|
|
---- RESULTS
|
|
' furiously',943
|
|
' carefully',893
|
|
' carefully ',875
|
|
'carefully ',854
|
|
' furiously ',845
|
|
---- TYPES
|
|
STRING, BIGINT
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test query with string grouping column and string agg columns
|
|
# Could only get it to spill reliably with num_nodes=1.
|
|
# TODO: revisit with new buffer pool.
|
|
set num_nodes=1;
|
|
set max_block_mgr_memory=25m;
|
|
select l_returnflag, l_orderkey, round(avg(l_tax),2), min(l_shipmode)
|
|
from lineitem
|
|
group by 1,2
|
|
order by 1,2 limit 3;
|
|
---- RESULTS
|
|
'A',3,0.05,'RAIL'
|
|
'A',5,0.03,'AIR'
|
|
'A',6,0.03,'TRUCK'
|
|
---- TYPES
|
|
STRING, BIGINT, DECIMAL, STRING
|
|
---- RUNTIME_PROFILE
|
|
# Verify that spilling happened in the agg.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test with string intermediate state (avg() uses string intermediate value).
|
|
set max_block_mgr_memory=25m;
|
|
select l_orderkey, avg(l_orderkey)
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 5
|
|
---- RESULTS
|
|
1,1
|
|
2,2
|
|
3,3
|
|
4,4
|
|
5,5
|
|
---- TYPES
|
|
BIGINT, DOUBLE
|
|
---- RUNTIME_PROFILE
|
|
# Verify that passthrough and spilling happened in the pre and merge agg.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*NumRepartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=100m;
|
|
select count(l1.l_tax)
|
|
from
|
|
lineitem l1,
|
|
lineitem l2,
|
|
lineitem l3
|
|
where
|
|
l1.l_tax < 0.01 and
|
|
l2.l_tax < 0.04 and
|
|
l1.l_orderkey = l2.l_orderkey and
|
|
l1.l_orderkey = l3.l_orderkey and
|
|
l1.l_comment = l3.l_comment and
|
|
l1.l_shipdate = l3.l_shipdate
|
|
---- RESULTS
|
|
1846743
|
|
---- TYPES
|
|
BIGINT
|
|
---- RUNTIME_PROFILE
|
|
# Verify that at least one of the joins was spilled.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=40m;
|
|
select max(t1.total_count), max(t1.l_shipinstruct), max(t1.l_comment) from
|
|
(select l_shipinstruct, l_comment, count(*) over () total_count from lineitem) t1
|
|
---- RESULTS
|
|
6001215,'TAKE BACK RETURN','zzle? slyly final platelets sleep quickly. '
|
|
---- TYPES
|
|
BIGINT, STRING, STRING
|
|
---- RUNTIME_PROFILE
|
|
# Indirectly verify that the analytic spilled: if it spills a block, it must repin it.
|
|
row_regex: .*PinTime: [1-9][0-9]*.*
|
|
====
|
|
---- QUERY
|
|
# Run this query with very low memory. Since the tables are small, the PA/PHJ should be
|
|
# using buffers much smaller than the io buffer.
|
|
set max_block_mgr_memory=10m;
|
|
select a.int_col, count(*)
|
|
from functional.alltypessmall a, functional.alltypessmall b, functional.alltypessmall c
|
|
where a.id = b.id and b.id = c.id group by a.int_col
|
|
---- RESULTS
|
|
0,12
|
|
1,12
|
|
2,12
|
|
3,12
|
|
4,12
|
|
5,8
|
|
6,8
|
|
7,8
|
|
8,8
|
|
9,8
|
|
---- TYPES
|
|
INT, BIGINT
|
|
---- RUNTIME_PROFILE
|
|
# This query is not meant to spill.
|
|
row_regex: .*SpilledPartitions: 0 .*
|
|
====
|
|
---- QUERY: TPCH-Q21
|
|
# Adding TPCH-Q21 in the spilling test to check for IMPALA-1471 (spilling left anti
|
|
# and left outer joins were returning wrong results).
|
|
# Q21 - Suppliers Who Kept Orders Waiting Query
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=65m;
|
|
select
|
|
s_name,
|
|
count(*) as numwait
|
|
from
|
|
supplier,
|
|
lineitem l1,
|
|
orders,
|
|
nation
|
|
where
|
|
s_suppkey = l1.l_suppkey
|
|
and o_orderkey = l1.l_orderkey
|
|
and o_orderstatus = 'F'
|
|
and l1.l_receiptdate > l1.l_commitdate
|
|
and exists (
|
|
select
|
|
*
|
|
from
|
|
lineitem l2
|
|
where
|
|
l2.l_orderkey = l1.l_orderkey
|
|
and l2.l_suppkey <> l1.l_suppkey
|
|
)
|
|
and not exists (
|
|
select
|
|
*
|
|
from
|
|
lineitem l3
|
|
where
|
|
l3.l_orderkey = l1.l_orderkey
|
|
and l3.l_suppkey <> l1.l_suppkey
|
|
and l3.l_receiptdate > l3.l_commitdate
|
|
)
|
|
and s_nationkey = n_nationkey
|
|
and n_name = 'SAUDI ARABIA'
|
|
group by
|
|
s_name
|
|
order by
|
|
numwait desc,
|
|
s_name
|
|
limit 100
|
|
---- RESULTS
|
|
'Supplier#000002829',20
|
|
'Supplier#000005808',18
|
|
'Supplier#000000262',17
|
|
'Supplier#000000496',17
|
|
'Supplier#000002160',17
|
|
'Supplier#000002301',17
|
|
'Supplier#000002540',17
|
|
'Supplier#000003063',17
|
|
'Supplier#000005178',17
|
|
'Supplier#000008331',17
|
|
'Supplier#000002005',16
|
|
'Supplier#000002095',16
|
|
'Supplier#000005799',16
|
|
'Supplier#000005842',16
|
|
'Supplier#000006450',16
|
|
'Supplier#000006939',16
|
|
'Supplier#000009200',16
|
|
'Supplier#000009727',16
|
|
'Supplier#000000486',15
|
|
'Supplier#000000565',15
|
|
'Supplier#000001046',15
|
|
'Supplier#000001047',15
|
|
'Supplier#000001161',15
|
|
'Supplier#000001336',15
|
|
'Supplier#000001435',15
|
|
'Supplier#000003075',15
|
|
'Supplier#000003335',15
|
|
'Supplier#000005649',15
|
|
'Supplier#000006027',15
|
|
'Supplier#000006795',15
|
|
'Supplier#000006800',15
|
|
'Supplier#000006824',15
|
|
'Supplier#000007131',15
|
|
'Supplier#000007382',15
|
|
'Supplier#000008913',15
|
|
'Supplier#000009787',15
|
|
'Supplier#000000633',14
|
|
'Supplier#000001960',14
|
|
'Supplier#000002323',14
|
|
'Supplier#000002490',14
|
|
'Supplier#000002993',14
|
|
'Supplier#000003101',14
|
|
'Supplier#000004489',14
|
|
'Supplier#000005435',14
|
|
'Supplier#000005583',14
|
|
'Supplier#000005774',14
|
|
'Supplier#000007579',14
|
|
'Supplier#000008180',14
|
|
'Supplier#000008695',14
|
|
'Supplier#000009224',14
|
|
'Supplier#000000357',13
|
|
'Supplier#000000436',13
|
|
'Supplier#000000610',13
|
|
'Supplier#000000788',13
|
|
'Supplier#000000889',13
|
|
'Supplier#000001062',13
|
|
'Supplier#000001498',13
|
|
'Supplier#000002056',13
|
|
'Supplier#000002312',13
|
|
'Supplier#000002344',13
|
|
'Supplier#000002596',13
|
|
'Supplier#000002615',13
|
|
'Supplier#000002978',13
|
|
'Supplier#000003048',13
|
|
'Supplier#000003234',13
|
|
'Supplier#000003727',13
|
|
'Supplier#000003806',13
|
|
'Supplier#000004472',13
|
|
'Supplier#000005236',13
|
|
'Supplier#000005906',13
|
|
'Supplier#000006241',13
|
|
'Supplier#000006326',13
|
|
'Supplier#000006384',13
|
|
'Supplier#000006394',13
|
|
'Supplier#000006624',13
|
|
'Supplier#000006629',13
|
|
'Supplier#000006682',13
|
|
'Supplier#000006737',13
|
|
'Supplier#000006825',13
|
|
'Supplier#000007021',13
|
|
'Supplier#000007417',13
|
|
'Supplier#000007497',13
|
|
'Supplier#000007602',13
|
|
'Supplier#000008134',13
|
|
'Supplier#000008234',13
|
|
'Supplier#000009435',13
|
|
'Supplier#000009436',13
|
|
'Supplier#000009564',13
|
|
'Supplier#000009896',13
|
|
'Supplier#000000379',12
|
|
'Supplier#000000673',12
|
|
'Supplier#000000762',12
|
|
'Supplier#000000811',12
|
|
'Supplier#000000821',12
|
|
'Supplier#000001337',12
|
|
'Supplier#000001916',12
|
|
'Supplier#000001925',12
|
|
'Supplier#000002039',12
|
|
'Supplier#000002357',12
|
|
'Supplier#000002483',12
|
|
---- TYPES
|
|
string, bigint
|
|
---- RUNTIME_PROFILE
|
|
# Verify that at least one of the joins was spilled.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test aggregation spill with group_concat distinct
|
|
set num_nodes=1;
|
|
set max_block_mgr_memory=100m;
|
|
select l_orderkey, count(*), group_concat(distinct l_linestatus, '|')
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 10
|
|
---- RESULTS
|
|
1,6,'O'
|
|
2,1,'O'
|
|
3,6,'F'
|
|
4,1,'O'
|
|
5,3,'F'
|
|
6,1,'F'
|
|
7,7,'O'
|
|
32,6,'O'
|
|
33,4,'F'
|
|
34,3,'O'
|
|
---- TYPES
|
|
BIGINT, BIGINT, STRING
|
|
---- RUNTIME_PROFILE
|
|
# Verify that at least one of the aggs spilled.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-2612. The following query will cause CastToChar
|
|
# to be invoked when building the hash tables in partitioned aggregation
|
|
# nodes. CastToChar will do "local" memory allocation. Without the fix of
|
|
# IMPALA-2612, the peak memory consumption will be higher.
|
|
set mem_limit=800m;
|
|
set num_nodes=1;
|
|
set num_scanner_threads=1;
|
|
select count(distinct concat(cast(l_comment as char(120)), cast(l_comment as char(120)),
|
|
cast(l_comment as char(120)), cast(l_comment as char(120)),
|
|
cast(l_comment as char(120)), cast(l_comment as char(120))))
|
|
from lineitem
|
|
---- RESULTS
|
|
4502054
|
|
---- TYPES
|
|
BIGINT
|
|
---- RUNTIME_PROFILE
|
|
# Verify that the agg spilled.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test sort with inlined char column materialized by exprs.
|
|
# Set low memory limit to force spilling.
|
|
# IMPALA-3332: comparator makes local allocations that cause runaway memory consumption.
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=4m;
|
|
set mem_limit=200m;
|
|
set disable_outermost_topn=1;
|
|
select cast(l_comment as char(50))
|
|
from lineitem
|
|
order by 1
|
|
limit 20;
|
|
---- RESULTS
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias about the en '
|
|
' Tiresias about the slyly ironic dinos ca '
|
|
' Tiresias about the slyly unus '
|
|
' Tiresias above '
|
|
' Tiresias above the fox '
|
|
' Tiresias above the furiously final th '
|
|
' Tiresias above the slyly expr '
|
|
' Tiresias above the stealthily p '
|
|
---- TYPES
|
|
CHAR
|
|
---- RUNTIME_PROFILE
|
|
# Verify that the sort actually spilled
|
|
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test sort with input inlined char column materialized before sort.
|
|
set num_nodes=0;
|
|
set mem_limit=200m;
|
|
set max_block_mgr_memory=4m;
|
|
set disable_outermost_topn=1;
|
|
select char_col
|
|
from (select cast(l_comment as char(50)) char_col
|
|
from lineitem) subquery
|
|
order by 1
|
|
limit 20;
|
|
---- RESULTS
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias about the en '
|
|
' Tiresias about the slyly ironic dinos ca '
|
|
' Tiresias about the slyly unus '
|
|
' Tiresias above '
|
|
' Tiresias above the fox '
|
|
' Tiresias above the furiously final th '
|
|
' Tiresias above the slyly expr '
|
|
' Tiresias above the stealthily p '
|
|
---- TYPES
|
|
CHAR
|
|
---- RUNTIME_PROFILE
|
|
# Verify that the sort actually spilled
|
|
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test sort with input non-inlined char column materialized before sort.
|
|
# Set low memory limit to force spilling.
|
|
set num_nodes=0;
|
|
set mem_limit=200m;
|
|
set max_block_mgr_memory=4m;
|
|
set disable_outermost_topn=1;
|
|
select char_col
|
|
from (select cast(l_comment as char(200)) char_col
|
|
from lineitem) subquery
|
|
order by 1
|
|
limit 20;
|
|
---- RESULTS
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias about the en '
|
|
' Tiresias about the slyly ironic dinos ca '
|
|
' Tiresias about the slyly unus '
|
|
' Tiresias above '
|
|
' Tiresias above the fox '
|
|
' Tiresias above the furiously final th '
|
|
' Tiresias above the slyly expr '
|
|
' Tiresias above the stealthily p '
|
|
---- TYPES
|
|
CHAR
|
|
---- RUNTIME_PROFILE
|
|
# Verify that the sort actually spilled
|
|
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test sort with varchar column materialized by exprs.
|
|
# Set low memory limit to force spilling.
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=4m;
|
|
# IMPALA-3332: comparator makes local allocations that cause runaway memory consumption.
|
|
set mem_limit=200m;
|
|
set disable_outermost_topn=1;
|
|
select cast(l_comment as varchar(50))
|
|
from lineitem
|
|
order by 1
|
|
limit 20;
|
|
---- RESULTS
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias about the en'
|
|
' Tiresias about the slyly ironic dinos ca'
|
|
' Tiresias about the slyly unus'
|
|
' Tiresias above'
|
|
' Tiresias above the fox'
|
|
' Tiresias above the furiously final th'
|
|
' Tiresias above the slyly expr'
|
|
' Tiresias above the stealthily p'
|
|
---- TYPES
|
|
STRING
|
|
---- RUNTIME_PROFILE
|
|
# Verify that the sort actually spilled
|
|
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test sort with input varchar column materialized before sort.
|
|
# Set low memory limit to force spilling.
|
|
set num_nodes=0;
|
|
set mem_limit=200m;
|
|
set max_block_mgr_memory=4m;
|
|
set disable_outermost_topn=1;
|
|
select char_col
|
|
from (select cast(l_comment as varchar(50)) char_col
|
|
from lineitem) subquery
|
|
order by 1
|
|
limit 20;
|
|
---- RESULTS
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias '
|
|
' Tiresias about the en'
|
|
' Tiresias about the slyly ironic dinos ca'
|
|
' Tiresias about the slyly unus'
|
|
' Tiresias above'
|
|
' Tiresias above the fox'
|
|
' Tiresias above the furiously final th'
|
|
' Tiresias above the slyly expr'
|
|
' Tiresias above the stealthily p'
|
|
---- TYPES
|
|
STRING
|
|
---- RUNTIME_PROFILE
|
|
# Verify that the sort actually spilled
|
|
row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
|
|
====
|