mirror of
https://github.com/apache/impala.git
synced 2026-01-31 09:00:19 -05:00
The minimum requirement for a spillable operator is ((min_buffers -2) * default_buffer_size) + 2 * max_row_size. In the min reservation, we only reserve space for two large pages, one for reading, the other for writing. However, to make the non-streaming GroupingAggregator work correctly, we have to manage these extra reservations carefully. So it won't run out of the min reservation when it actually needs to spill a large page, or when it actually needs to read a large page. To be specific, for how to manage the large write page reservation, depending on whether needs_serialize is true or false: - If the aggregator needs to serialize the intermediate results when spilling a partition, we have to save a large page worth of reservation for the serialize stream, in case it needs to write large rows. This space can be restored when all the partitions are spilled so the serialize stream is not needed until we build/repartition a spilled partition and thus have pinned partitions again. If the large write page reservation is used, we save it back whenever possible after we spill or close a partition. - If the aggregator doesn't need the serialize stream at all, we can restore the large write page reservation whenever we fail to add a large row, before spilling any partitions. Reclaim it whenever possible after we spill or close a partition. A special case is when we are processing a large row and it's the last row in building/repartitioning a spilled partition, the large write page reservation can be restored for it no matter whether we need the serialize stream. Because partitions will be read out after this so no needs for spilling. For the large read page reservation, it's transferred to the spilled BufferedTupleStream that we are reading in building/repartitioning a spilled partition. The stream will restore some of it when reading a large page, and reclaim it when the output row batch is reset. Note that the stream is read in attach_on_read mode, the large page will be attached to the row batch's buffers and only get freed when the row batch is reset. Tests: - Add tests in test_spilling_large_rows (test_spilling.py) with different row sizes to reproduce the issue. - One test in test_spilling_no_debug_action becomes flaky after this patch. Revise the query to make the udf allocate larger strings so it can consistently pass. - Run CORE tests. Change-Id: I3d9c3a2e7f0da60071b920dec979729e86459775 Reviewed-on: http://gerrit.cloudera.org:8080/16240 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
256 lines
5.5 KiB
Plaintext
256 lines
5.5 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Create a temporary table with 10MB strings for the following tests.
|
|
# Need to bump memory estimate to prevent running out of memory.
|
|
set mem_limit="1.2gb";
|
|
create table bigstrs stored as parquet as
|
|
select *, repeat(string_col, 10000000) as bigstr
|
|
from functional.alltypes
|
|
order by id
|
|
limit 25
|
|
---- RESULTS
|
|
'Inserted 25 row(s)'
|
|
====
|
|
---- QUERY
|
|
# Row is too big to process in agg.
|
|
select id, count(distinct bigstr)
|
|
from bigstrs
|
|
group by id
|
|
---- CATCH
|
|
Row of size 9.54 MB could not be materialized by AGGREGATION_NODE
|
|
====
|
|
---- QUERY
|
|
# Agg should be able to process the large strings if we increase the row size.
|
|
set max_row_size=10m;
|
|
select id, count(distinct bigstr)
|
|
from bigstrs
|
|
group by id
|
|
order by id
|
|
---- TYPES
|
|
int,bigint
|
|
---- RESULTS
|
|
0,1
|
|
1,1
|
|
2,1
|
|
3,1
|
|
4,1
|
|
5,1
|
|
6,1
|
|
7,1
|
|
8,1
|
|
9,1
|
|
10,1
|
|
11,1
|
|
12,1
|
|
13,1
|
|
14,1
|
|
15,1
|
|
16,1
|
|
17,1
|
|
18,1
|
|
19,1
|
|
20,1
|
|
21,1
|
|
22,1
|
|
23,1
|
|
24,1
|
|
====
|
|
---- QUERY
|
|
# Row is too big to process in right side of hash join.
|
|
set mem_limit="1gb";
|
|
select straight_join atp.id, bs.id, atp.string_col
|
|
from functional.alltypes atp
|
|
join bigstrs bs on repeat(atp.string_col, 10000) = substring(bs.bigstr, 5000000, 10000) and atp.id = bs.id
|
|
where atp.id < 100
|
|
---- CATCH
|
|
Row of size 9.54 MB could not be materialized by Hash Join Builder (join_node_id=2). Increase the max_row_size query option (currently 512.00 KB) to process larger rows.
|
|
====
|
|
---- QUERY
|
|
# Row is too big to process in right side of hash join.
|
|
set mem_limit="1gb";
|
|
set max_row_size=18m;
|
|
select straight_join atp.id, bs.id, atp.string_col
|
|
from functional.alltypes atp
|
|
join bigstrs bs on repeat(atp.string_col, 10000) = substring(bs.bigstr, 5000000, 10000) and atp.id = bs.id
|
|
where atp.id < 100
|
|
---- TYPES
|
|
int,int,string
|
|
---- RESULTS
|
|
0,0,'0'
|
|
1,1,'1'
|
|
2,2,'2'
|
|
3,3,'3'
|
|
4,4,'4'
|
|
5,5,'5'
|
|
6,6,'6'
|
|
7,7,'7'
|
|
8,8,'8'
|
|
9,9,'9'
|
|
10,10,'0'
|
|
11,11,'1'
|
|
12,12,'2'
|
|
13,13,'3'
|
|
14,14,'4'
|
|
15,15,'5'
|
|
16,16,'6'
|
|
17,17,'7'
|
|
18,18,'8'
|
|
19,19,'9'
|
|
20,20,'0'
|
|
21,21,'1'
|
|
22,22,'2'
|
|
23,23,'3'
|
|
24,24,'4'
|
|
====
|
|
---- QUERY
|
|
# Row is too big to process in sort.
|
|
select id, substr(bigstr, 1, 5)
|
|
from bigstrs
|
|
order by bigstr, id
|
|
---- CATCH
|
|
Row of size 9.54 MB could not be materialized by SORT_NODE (id=1). Increase the max_row_size query option (currently 512.00 KB) to process larger rows.
|
|
====
|
|
---- QUERY
|
|
# Sort should be able to process the large strings if we increase the row size.
|
|
set max_row_size=10m;
|
|
select id, substr(bigstr, 1, 5)
|
|
from bigstrs
|
|
where id < 15
|
|
order by bigstr, id
|
|
---- TYPES
|
|
int,string
|
|
---- RESULTS
|
|
0,'00000'
|
|
10,'00000'
|
|
1,'11111'
|
|
11,'11111'
|
|
2,'22222'
|
|
12,'22222'
|
|
3,'33333'
|
|
13,'33333'
|
|
4,'44444'
|
|
14,'44444'
|
|
5,'55555'
|
|
6,'66666'
|
|
7,'77777'
|
|
8,'88888'
|
|
9,'99999'
|
|
====
|
|
---- QUERY
|
|
# Row is too big to process in sort or analytic.
|
|
SELECT id, int_col, substring(bigstr, 1, 10), substring(bigstr, 9999999, 1), rank
|
|
FROM (
|
|
SELECT id, int_col, bigstr, Rank() OVER (
|
|
ORDER BY int_col
|
|
) AS rank
|
|
FROM bigstrs
|
|
) a
|
|
ORDER BY id
|
|
---- CATCH
|
|
Row of size 9.54 MB could not be materialized by SORT_NODE (id=1). Increase the max_row_size query option (currently 512.00 KB) to process larger rows.
|
|
====
|
|
---- QUERY
|
|
# Sort and analytic should be able to process the large strings if we increase the row
|
|
# size.
|
|
set mem_limit="1gb";
|
|
set max_row_size=10m;
|
|
SELECT id, int_col, substring(bigstr, 1, 10), substring(bigstr, 9999999, 1), rank
|
|
FROM (
|
|
SELECT id, int_col, bigstr, Rank() OVER (
|
|
ORDER BY int_col
|
|
) AS rank
|
|
FROM bigstrs
|
|
) a
|
|
ORDER BY id
|
|
---- TYPES
|
|
int,int,string,string,bigint
|
|
---- RESULTS
|
|
0,0,'0000000000','0',1
|
|
1,1,'1111111111','1',4
|
|
2,2,'2222222222','2',7
|
|
3,3,'3333333333','3',10
|
|
4,4,'4444444444','4',13
|
|
5,5,'5555555555','5',16
|
|
6,6,'6666666666','6',18
|
|
7,7,'7777777777','7',20
|
|
8,8,'8888888888','8',22
|
|
9,9,'9999999999','9',24
|
|
10,0,'0000000000','0',1
|
|
11,1,'1111111111','1',4
|
|
12,2,'2222222222','2',7
|
|
13,3,'3333333333','3',10
|
|
14,4,'4444444444','4',13
|
|
15,5,'5555555555','5',16
|
|
16,6,'6666666666','6',18
|
|
17,7,'7777777777','7',20
|
|
18,8,'8888888888','8',22
|
|
19,9,'9999999999','9',24
|
|
20,0,'0000000000','0',1
|
|
21,1,'1111111111','1',4
|
|
22,2,'2222222222','2',7
|
|
23,3,'3333333333','3',10
|
|
24,4,'4444444444','4',13
|
|
====
|
|
---- QUERY
|
|
# IMPALA-9955,IMPALA-9957: Test aggregation on a table with both large rows and normal
|
|
# rows. TODO: use UNION ALL to generate the table after IMPALA-10040 is fixed.
|
|
set mem_limit="1gb";
|
|
create table bigstrs2 stored as parquet as
|
|
select *, repeat(string_col, 100000) as bigstr
|
|
from functional.alltypes
|
|
order by id
|
|
limit 10;
|
|
insert into bigstrs2
|
|
select *, repeat(string_col, 10000000) as bigstr
|
|
from functional.alltypes
|
|
order by id
|
|
limit 10;
|
|
====
|
|
---- QUERY
|
|
set mem_limit="1gb";
|
|
set max_row_size=10m;
|
|
select group_concat(string_col), length(bigstr) from bigstrs2
|
|
group by bigstr;
|
|
---- TYPES
|
|
string,int
|
|
---- RESULTS
|
|
'0',100000
|
|
'1',100000
|
|
'2',100000
|
|
'3',100000
|
|
'4',100000
|
|
'5',100000
|
|
'6',100000
|
|
'7',100000
|
|
'8',100000
|
|
'9',100000
|
|
'0',10000000
|
|
'1',10000000
|
|
'2',10000000
|
|
'3',10000000
|
|
'4',10000000
|
|
'5',10000000
|
|
'6',10000000
|
|
'7',10000000
|
|
'8',10000000
|
|
'9',10000000
|
|
====
|
|
---- QUERY
|
|
# IMPALA-9955,IMPALA-9957: Test aggregation on a table with random length of large rows.
|
|
# Can't check the results since it's a random table. The following queries will fail or
|
|
# crash the impalads when there are bugs in reservation.
|
|
set mem_limit="2gb";
|
|
create table bigstrs3 stored as parquet as
|
|
select *, repeat(uuid(), cast(random() * 200000 as int)) as bigstr
|
|
from functional.alltypes
|
|
limit 100;
|
|
# Length of uuid() is 36. So the max row size is 7,200,000.
|
|
set MAX_ROW_SIZE=8m;
|
|
create table my_str_group stored as parquet as
|
|
select group_concat(string_col) as ss, bigstr
|
|
from bigstrs3 group by bigstr;
|
|
create table my_cnt stored as parquet as
|
|
select count(*) as cnt, bigstr
|
|
from bigstrs3 group by bigstr;
|
|
====
|