mirror of
https://github.com/apache/impala.git
synced 2026-01-02 21:00:35 -05:00
When building hash tables for the build side in partitioned hash join or aggreagtion, we will evaluate the build or probe side expressions to compute the hash values for each TupleRow. Evaluation of certain expressions (e.g. CastToChar) requires "local" memory allocation. "Local" memory allocation is supposed to be freed after processing each row batch. However, the calls to free local allocations are missing in PartitionedHashJoinNode::BuildHashTableInternal() and PartitionedAggregationNode::ProcessStream(). This causes all "local" memory allocation to accumulate potentially for the entire duration of the query or until GetNext() is called. This may lead to unnecessary memory allocation failure as memory limit is exceeded. This patch calls ExecNode::FreeLocalAllocations() at least once per row-batch when building hash tables. It also adds the missing checks for the query status in the loop building hash tables. Please note that QueryMaintenance() isn't called due to its overhead in memory limit checks. Change-Id: Idbeab043a45b0aaf6b6a8c560882bd1474a1216d Reviewed-on: http://gerrit.cloudera.org:8080/1448 Reviewed-by: Michael Ho <kwho@cloudera.com> Tested-by: Internal Jenkins
356 lines
7.5 KiB
Plaintext
356 lines
7.5 KiB
Plaintext
====
|
|
---- QUERY
|
|
set num_nodes=1;
|
|
set max_block_mgr_memory=265m;
|
|
select l_orderkey, count(*)
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 10
|
|
---- RESULTS
|
|
1,6
|
|
2,1
|
|
3,6
|
|
4,1
|
|
5,3
|
|
6,1
|
|
7,7
|
|
32,6
|
|
33,4
|
|
34,3
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test query with string grouping column and string agg columns
|
|
set num_nodes=1;
|
|
set max_block_mgr_memory=275m;
|
|
select l_returnflag, l_orderkey, avg(l_tax), min(l_shipmode)
|
|
from lineitem
|
|
group by 1,2
|
|
order by 1,2 limit 3
|
|
---- RESULTS
|
|
'A',3,0.05,'RAIL'
|
|
'A',5,0.03,'AIR'
|
|
'A',6,0.03,'TRUCK'
|
|
---- TYPES
|
|
STRING, BIGINT, DECIMAL, STRING
|
|
====
|
|
---- QUERY
|
|
# TODO: the distributed agg tests are highly likely of becoming flaking. The memory is
|
|
# split between the local and merge agg in an uncontrolled way, which can lead to
|
|
# pathological cases causing the queries to fail with OOM. These cases are expected
|
|
# to be unlikely with reasonable limits but we are explicitly trying to pick very low
|
|
# limits (IMPALA-1305).
|
|
set max_block_mgr_memory=275m;
|
|
select l_orderkey, count(*)
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 10;
|
|
---- RESULTS
|
|
1,6
|
|
2,1
|
|
3,6
|
|
4,1
|
|
5,3
|
|
6,1
|
|
7,7
|
|
32,6
|
|
33,4
|
|
34,3
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test query with string grouping column
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=275m;
|
|
select l_comment, count(*)
|
|
from lineitem
|
|
group by 1
|
|
order by count(*) desc limit 5
|
|
---- RESULTS
|
|
' furiously',943
|
|
' carefully',893
|
|
' carefully ',875
|
|
'carefully ',854
|
|
' furiously ',845
|
|
---- TYPES
|
|
STRING, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Disabled due to IMPALA-1305.
|
|
# Test query with string grouping column and string agg columns
|
|
#set num_nodes=0;
|
|
#set max_block_mgr_memory=80m;
|
|
#select l_returnflag, l_orderkey, round(avg(l_tax),2), min(l_shipmode)
|
|
#from lineitem
|
|
#group by 1,2
|
|
#order by 1,2 limit 3;
|
|
#---- RESULTS
|
|
#'A',3,0.05,'RAIL'
|
|
#'A',5,0.03,'AIR'
|
|
#'A',6,0.03,'TRUCK'
|
|
#---- TYPES
|
|
#STRING, BIGINT, DOUBLE, STRING
|
|
#====
|
|
#
|
|
# Test query with string intermediate state.
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=275m;
|
|
select l_orderkey, avg(l_orderkey)
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 5
|
|
---- RESULTS
|
|
1,1
|
|
2,2
|
|
3,3
|
|
4,4
|
|
5,5
|
|
---- TYPES
|
|
BIGINT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=100m;
|
|
select count(l1.l_tax)
|
|
from
|
|
lineitem l1,
|
|
lineitem l2,
|
|
lineitem l3
|
|
where
|
|
l1.l_tax < 0.01 and
|
|
l2.l_tax < 0.04 and
|
|
l1.l_orderkey = l2.l_orderkey and
|
|
l1.l_orderkey = l3.l_orderkey and
|
|
l1.l_comment = l3.l_comment and
|
|
l1.l_shipdate = l3.l_shipdate
|
|
---- RESULTS
|
|
1846743
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=40m;
|
|
select max(t1.total_count), max(t1.l_shipinstruct), max(t1.l_comment) from
|
|
(select l_shipinstruct, l_comment, count(*) over () total_count from lineitem) t1
|
|
---- RESULTS
|
|
6001215,'TAKE BACK RETURN','zzle? slyly final platelets sleep quickly. '
|
|
---- TYPES
|
|
BIGINT, STRING, STRING
|
|
---- QUERY
|
|
# Run this query with very low memory. Since the tables are small, the PA/PHJ should be
|
|
# using buffers much smaller than the io buffer.
|
|
set max_block_mgr_memory=10m;
|
|
select a.int_col, count(*)
|
|
from functional.alltypessmall a, functional.alltypessmall b, functional.alltypessmall c
|
|
where a.id = b.id and b.id = c.id group by a.int_col
|
|
---- RESULTS
|
|
0,12
|
|
1,12
|
|
2,12
|
|
3,12
|
|
4,12
|
|
5,8
|
|
6,8
|
|
7,8
|
|
8,8
|
|
9,8
|
|
---- TYPES
|
|
INT, BIGINT
|
|
====
|
|
---- QUERY: TPCH-Q21
|
|
# Adding TPCH-Q21 in the spilling test to check for IMPALA-1471 (spilling left anti
|
|
# and left outer joins were returning wrong results).
|
|
# Q21 - Suppliers Who Kept Orders Waiting Query
|
|
set num_nodes=0;
|
|
set max_block_mgr_memory=100m;
|
|
select
|
|
s_name,
|
|
count(*) as numwait
|
|
from
|
|
supplier,
|
|
lineitem l1,
|
|
orders,
|
|
nation
|
|
where
|
|
s_suppkey = l1.l_suppkey
|
|
and o_orderkey = l1.l_orderkey
|
|
and o_orderstatus = 'F'
|
|
and l1.l_receiptdate > l1.l_commitdate
|
|
and exists (
|
|
select
|
|
*
|
|
from
|
|
lineitem l2
|
|
where
|
|
l2.l_orderkey = l1.l_orderkey
|
|
and l2.l_suppkey <> l1.l_suppkey
|
|
)
|
|
and not exists (
|
|
select
|
|
*
|
|
from
|
|
lineitem l3
|
|
where
|
|
l3.l_orderkey = l1.l_orderkey
|
|
and l3.l_suppkey <> l1.l_suppkey
|
|
and l3.l_receiptdate > l3.l_commitdate
|
|
)
|
|
and s_nationkey = n_nationkey
|
|
and n_name = 'SAUDI ARABIA'
|
|
group by
|
|
s_name
|
|
order by
|
|
numwait desc,
|
|
s_name
|
|
limit 100
|
|
---- RESULTS
|
|
'Supplier#000002829',20
|
|
'Supplier#000005808',18
|
|
'Supplier#000000262',17
|
|
'Supplier#000000496',17
|
|
'Supplier#000002160',17
|
|
'Supplier#000002301',17
|
|
'Supplier#000002540',17
|
|
'Supplier#000003063',17
|
|
'Supplier#000005178',17
|
|
'Supplier#000008331',17
|
|
'Supplier#000002005',16
|
|
'Supplier#000002095',16
|
|
'Supplier#000005799',16
|
|
'Supplier#000005842',16
|
|
'Supplier#000006450',16
|
|
'Supplier#000006939',16
|
|
'Supplier#000009200',16
|
|
'Supplier#000009727',16
|
|
'Supplier#000000486',15
|
|
'Supplier#000000565',15
|
|
'Supplier#000001046',15
|
|
'Supplier#000001047',15
|
|
'Supplier#000001161',15
|
|
'Supplier#000001336',15
|
|
'Supplier#000001435',15
|
|
'Supplier#000003075',15
|
|
'Supplier#000003335',15
|
|
'Supplier#000005649',15
|
|
'Supplier#000006027',15
|
|
'Supplier#000006795',15
|
|
'Supplier#000006800',15
|
|
'Supplier#000006824',15
|
|
'Supplier#000007131',15
|
|
'Supplier#000007382',15
|
|
'Supplier#000008913',15
|
|
'Supplier#000009787',15
|
|
'Supplier#000000633',14
|
|
'Supplier#000001960',14
|
|
'Supplier#000002323',14
|
|
'Supplier#000002490',14
|
|
'Supplier#000002993',14
|
|
'Supplier#000003101',14
|
|
'Supplier#000004489',14
|
|
'Supplier#000005435',14
|
|
'Supplier#000005583',14
|
|
'Supplier#000005774',14
|
|
'Supplier#000007579',14
|
|
'Supplier#000008180',14
|
|
'Supplier#000008695',14
|
|
'Supplier#000009224',14
|
|
'Supplier#000000357',13
|
|
'Supplier#000000436',13
|
|
'Supplier#000000610',13
|
|
'Supplier#000000788',13
|
|
'Supplier#000000889',13
|
|
'Supplier#000001062',13
|
|
'Supplier#000001498',13
|
|
'Supplier#000002056',13
|
|
'Supplier#000002312',13
|
|
'Supplier#000002344',13
|
|
'Supplier#000002596',13
|
|
'Supplier#000002615',13
|
|
'Supplier#000002978',13
|
|
'Supplier#000003048',13
|
|
'Supplier#000003234',13
|
|
'Supplier#000003727',13
|
|
'Supplier#000003806',13
|
|
'Supplier#000004472',13
|
|
'Supplier#000005236',13
|
|
'Supplier#000005906',13
|
|
'Supplier#000006241',13
|
|
'Supplier#000006326',13
|
|
'Supplier#000006384',13
|
|
'Supplier#000006394',13
|
|
'Supplier#000006624',13
|
|
'Supplier#000006629',13
|
|
'Supplier#000006682',13
|
|
'Supplier#000006737',13
|
|
'Supplier#000006825',13
|
|
'Supplier#000007021',13
|
|
'Supplier#000007417',13
|
|
'Supplier#000007497',13
|
|
'Supplier#000007602',13
|
|
'Supplier#000008134',13
|
|
'Supplier#000008234',13
|
|
'Supplier#000009435',13
|
|
'Supplier#000009436',13
|
|
'Supplier#000009564',13
|
|
'Supplier#000009896',13
|
|
'Supplier#000000379',12
|
|
'Supplier#000000673',12
|
|
'Supplier#000000762',12
|
|
'Supplier#000000811',12
|
|
'Supplier#000000821',12
|
|
'Supplier#000001337',12
|
|
'Supplier#000001916',12
|
|
'Supplier#000001925',12
|
|
'Supplier#000002039',12
|
|
'Supplier#000002357',12
|
|
'Supplier#000002483',12
|
|
---- TYPES
|
|
string, bigint
|
|
====
|
|
---- QUERY
|
|
# Test aggregation spill with group_concat distinct
|
|
# TODO: get this to spill.
|
|
set num_nodes=1;
|
|
set max_block_mgr_memory=265m;
|
|
select l_orderkey, count(*), group_concat(distinct l_linestatus, '|')
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 10
|
|
---- RESULTS
|
|
1,6,'O'
|
|
2,1,'O'
|
|
3,6,'F'
|
|
4,1,'O'
|
|
5,3,'F'
|
|
6,1,'F'
|
|
7,7,'O'
|
|
32,6,'O'
|
|
33,4,'F'
|
|
34,3,'O'
|
|
---- TYPES
|
|
BIGINT, BIGINT, STRING
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-2612. The following query will cause CastToChar
|
|
# to be invoked when building the hash tables in partitioned aggregation
|
|
# nodes. CastToChar will do "local" memory allocation. Without the fix of
|
|
# IMPALA-2612, the peak memory consumption will be higher.
|
|
set mem_limit=800m;
|
|
set num_nodes=1;
|
|
set num_scanner_threads=1;
|
|
select count(distinct concat(cast(l_comment as char(120)), cast(l_comment as char(120)),
|
|
cast(l_comment as char(120)), cast(l_comment as char(120)),
|
|
cast(l_comment as char(120)), cast(l_comment as char(120))))
|
|
from lineitem
|
|
---- RESULTS
|
|
4502054
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
set mem_limit=0;
|
|
set num_nodes=0;
|
|
==== |