Files
impala/testdata/workloads/functional-query/queries/QueryTest/spilling-large-rows.test
stiga-huang e0a6e942b2 IMPALA-9955,IMPALA-9957: Fix not enough reservation for large pages in GroupingAggregator
The minimum requirement for a spillable operator is ((min_buffers -2) *
default_buffer_size) + 2 * max_row_size. In the min reservation, we only
reserve space for two large pages, one for reading, the other for
writing. However, to make the non-streaming GroupingAggregator work
correctly, we have to manage these extra reservations carefully. So it
won't run out of the min reservation when it actually needs to spill a
large page, or when it actually needs to read a large page.

To be specific, for how to manage the large write page reservation,
depending on whether needs_serialize is true or false:
- If the aggregator needs to serialize the intermediate results when
  spilling a partition, we have to save a large page worth of
  reservation for the serialize stream, in case it needs to write large
  rows. This space can be restored when all the partitions are spilled
  so the serialize stream is not needed until we build/repartition a
  spilled partition and thus have pinned partitions again. If the large
  write page reservation is used, we save it back whenever possible
  after we spill or close a partition.
- If the aggregator doesn't need the serialize stream at all, we can
  restore the large write page reservation whenever we fail to add a
  large row, before spilling any partitions. Reclaim it whenever
  possible after we spill or close a partition.
A special case is when we are processing a large row and it's the last
row in building/repartitioning a spilled partition, the large write page
reservation can be restored for it no matter whether we need the
serialize stream. Because partitions will be read out after this so no
needs for spilling.

For the large read page reservation, it's transferred to the spilled
BufferedTupleStream that we are reading in building/repartitioning a
spilled partition. The stream will restore some of it when reading a
large page, and reclaim it when the output row batch is reset. Note that
the stream is read in attach_on_read mode, the large page will be
attached to the row batch's buffers and only get freed when the row
batch is reset.

Tests:
- Add tests in test_spilling_large_rows (test_spilling.py) with
  different row sizes to reproduce the issue.
- One test in test_spilling_no_debug_action becomes flaky after this
  patch. Revise the query to make the udf allocate larger strings so it
  can consistently pass.
- Run CORE tests.

Change-Id: I3d9c3a2e7f0da60071b920dec979729e86459775
Reviewed-on: http://gerrit.cloudera.org:8080/16240
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
2020-08-25 16:11:20 +00:00

256 lines
5.5 KiB
Plaintext

====
---- QUERY
# Create a temporary table with 10MB strings for the following tests.
# Need to bump memory estimate to prevent running out of memory.
set mem_limit="1.2gb";
create table bigstrs stored as parquet as
select *, repeat(string_col, 10000000) as bigstr
from functional.alltypes
order by id
limit 25
---- RESULTS
'Inserted 25 row(s)'
====
---- QUERY
# Row is too big to process in agg.
select id, count(distinct bigstr)
from bigstrs
group by id
---- CATCH
Row of size 9.54 MB could not be materialized by AGGREGATION_NODE
====
---- QUERY
# Agg should be able to process the large strings if we increase the row size.
set max_row_size=10m;
select id, count(distinct bigstr)
from bigstrs
group by id
order by id
---- TYPES
int,bigint
---- RESULTS
0,1
1,1
2,1
3,1
4,1
5,1
6,1
7,1
8,1
9,1
10,1
11,1
12,1
13,1
14,1
15,1
16,1
17,1
18,1
19,1
20,1
21,1
22,1
23,1
24,1
====
---- QUERY
# Row is too big to process in right side of hash join.
set mem_limit="1gb";
select straight_join atp.id, bs.id, atp.string_col
from functional.alltypes atp
join bigstrs bs on repeat(atp.string_col, 10000) = substring(bs.bigstr, 5000000, 10000) and atp.id = bs.id
where atp.id < 100
---- CATCH
Row of size 9.54 MB could not be materialized by Hash Join Builder (join_node_id=2). Increase the max_row_size query option (currently 512.00 KB) to process larger rows.
====
---- QUERY
# Row is too big to process in right side of hash join.
set mem_limit="1gb";
set max_row_size=18m;
select straight_join atp.id, bs.id, atp.string_col
from functional.alltypes atp
join bigstrs bs on repeat(atp.string_col, 10000) = substring(bs.bigstr, 5000000, 10000) and atp.id = bs.id
where atp.id < 100
---- TYPES
int,int,string
---- RESULTS
0,0,'0'
1,1,'1'
2,2,'2'
3,3,'3'
4,4,'4'
5,5,'5'
6,6,'6'
7,7,'7'
8,8,'8'
9,9,'9'
10,10,'0'
11,11,'1'
12,12,'2'
13,13,'3'
14,14,'4'
15,15,'5'
16,16,'6'
17,17,'7'
18,18,'8'
19,19,'9'
20,20,'0'
21,21,'1'
22,22,'2'
23,23,'3'
24,24,'4'
====
---- QUERY
# Row is too big to process in sort.
select id, substr(bigstr, 1, 5)
from bigstrs
order by bigstr, id
---- CATCH
Row of size 9.54 MB could not be materialized by SORT_NODE (id=1). Increase the max_row_size query option (currently 512.00 KB) to process larger rows.
====
---- QUERY
# Sort should be able to process the large strings if we increase the row size.
set max_row_size=10m;
select id, substr(bigstr, 1, 5)
from bigstrs
where id < 15
order by bigstr, id
---- TYPES
int,string
---- RESULTS
0,'00000'
10,'00000'
1,'11111'
11,'11111'
2,'22222'
12,'22222'
3,'33333'
13,'33333'
4,'44444'
14,'44444'
5,'55555'
6,'66666'
7,'77777'
8,'88888'
9,'99999'
====
---- QUERY
# Row is too big to process in sort or analytic.
SELECT id, int_col, substring(bigstr, 1, 10), substring(bigstr, 9999999, 1), rank
FROM (
SELECT id, int_col, bigstr, Rank() OVER (
ORDER BY int_col
) AS rank
FROM bigstrs
) a
ORDER BY id
---- CATCH
Row of size 9.54 MB could not be materialized by SORT_NODE (id=1). Increase the max_row_size query option (currently 512.00 KB) to process larger rows.
====
---- QUERY
# Sort and analytic should be able to process the large strings if we increase the row
# size.
set mem_limit="1gb";
set max_row_size=10m;
SELECT id, int_col, substring(bigstr, 1, 10), substring(bigstr, 9999999, 1), rank
FROM (
SELECT id, int_col, bigstr, Rank() OVER (
ORDER BY int_col
) AS rank
FROM bigstrs
) a
ORDER BY id
---- TYPES
int,int,string,string,bigint
---- RESULTS
0,0,'0000000000','0',1
1,1,'1111111111','1',4
2,2,'2222222222','2',7
3,3,'3333333333','3',10
4,4,'4444444444','4',13
5,5,'5555555555','5',16
6,6,'6666666666','6',18
7,7,'7777777777','7',20
8,8,'8888888888','8',22
9,9,'9999999999','9',24
10,0,'0000000000','0',1
11,1,'1111111111','1',4
12,2,'2222222222','2',7
13,3,'3333333333','3',10
14,4,'4444444444','4',13
15,5,'5555555555','5',16
16,6,'6666666666','6',18
17,7,'7777777777','7',20
18,8,'8888888888','8',22
19,9,'9999999999','9',24
20,0,'0000000000','0',1
21,1,'1111111111','1',4
22,2,'2222222222','2',7
23,3,'3333333333','3',10
24,4,'4444444444','4',13
====
---- QUERY
# IMPALA-9955,IMPALA-9957: Test aggregation on a table with both large rows and normal
# rows. TODO: use UNION ALL to generate the table after IMPALA-10040 is fixed.
set mem_limit="1gb";
create table bigstrs2 stored as parquet as
select *, repeat(string_col, 100000) as bigstr
from functional.alltypes
order by id
limit 10;
insert into bigstrs2
select *, repeat(string_col, 10000000) as bigstr
from functional.alltypes
order by id
limit 10;
====
---- QUERY
set mem_limit="1gb";
set max_row_size=10m;
select group_concat(string_col), length(bigstr) from bigstrs2
group by bigstr;
---- TYPES
string,int
---- RESULTS
'0',100000
'1',100000
'2',100000
'3',100000
'4',100000
'5',100000
'6',100000
'7',100000
'8',100000
'9',100000
'0',10000000
'1',10000000
'2',10000000
'3',10000000
'4',10000000
'5',10000000
'6',10000000
'7',10000000
'8',10000000
'9',10000000
====
---- QUERY
# IMPALA-9955,IMPALA-9957: Test aggregation on a table with random length of large rows.
# Can't check the results since it's a random table. The following queries will fail or
# crash the impalads when there are bugs in reservation.
set mem_limit="2gb";
create table bigstrs3 stored as parquet as
select *, repeat(uuid(), cast(random() * 200000 as int)) as bigstr
from functional.alltypes
limit 100;
# Length of uuid() is 36. So the max row size is 7,200,000.
set MAX_ROW_SIZE=8m;
create table my_str_group stored as parquet as
select group_concat(string_col) as ss, bigstr
from bigstrs3 group by bigstr;
create table my_cnt stored as parquet as
select count(*) as cnt, bigstr
from bigstrs3 group by bigstr;
====