mirror of
https://github.com/apache/impala.git
synced 2026-01-20 12:01:06 -05:00
Adds support for a "max_row_size" query option that instructs Impala to reserve enough memory to process rows of the specified size. For spilling operators, the planner reserves enough memory to process rows of this size. The advantage of this compared to simply specifying larger values for min_spillable_buffer_size and default_spillable_buffer_size is that operators may be able to handler larger rows without increasing the size of all their buffers. The default value is 512KB. I picked that number because it doesn't increase minimum reservations *too* much even with smaller buffers like 64kb but should be large enough for almost all reasonable workloads. This is implemented in the aggs and joins using the variable page size support added to BufferedTupleStream in an earlier commit. The synopsis is that each stream requires reservation for one default-sized page per read and write iterator, and temporarily requires reservation for a max-sized page when reading or writing larger pages. The max-sized write reservation is released immediately after the row is appended and the max-size read reservation is released after advancing to the next row. The sorter and analytic simply use max-sized buffers for all pages in the stream. Testing: Updated existing planner tests to reflect default max_row_size. Added new planner tests to test the effect of the query option. Added "set" test to check validation of query option. Added end-to-end tests exercising spilling operators with large rows with and without spilling induced by SET_DENY_RESERVATION_PROBABILITY. Change-Id: Ic70f6dddbcef124bb4b329ffa2e42a74a1826570 Reviewed-on: http://gerrit.cloudera.org:8080/7629 Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com> Tested-by: Impala Public Jenkins
294 lines
6.6 KiB
Plaintext
294 lines
6.6 KiB
Plaintext
====
|
|
---- QUERY
|
|
set
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0'
|
|
'ABORT_ON_ERROR','0'
|
|
'ALLOW_UNSUPPORTED_FORMATS','0'
|
|
'BATCH_SIZE','0'
|
|
'DEBUG_ACTION',''
|
|
'DEFAULT_ORDER_BY_LIMIT','-1'
|
|
'DISABLE_CACHED_READS','0'
|
|
'DISABLE_CODEGEN','0'
|
|
'DISABLE_OUTERMOST_TOPN','0'
|
|
'EXPLAIN_LEVEL','1'
|
|
'HBASE_CACHE_BLOCKS','0'
|
|
'HBASE_CACHING','0'
|
|
'MAX_ERRORS','100'
|
|
'MAX_IO_BUFFERS','0'
|
|
'MAX_SCAN_RANGE_LENGTH','0'
|
|
'MEM_LIMIT','0'
|
|
'NUM_NODES','0'
|
|
'NUM_SCANNER_THREADS','0'
|
|
'COMPRESSION_CODEC','NONE'
|
|
'PARQUET_FILE_SIZE','0'
|
|
'REQUEST_POOL',''
|
|
'RESERVATION_REQUEST_TIMEOUT','0'
|
|
'RM_INITIAL_MEM','0'
|
|
'SYNC_DDL','0'
|
|
'V_CPU_CORES','0'
|
|
---- TYPES
|
|
STRING, STRING
|
|
====
|
|
---- QUERY
|
|
set explain_level=3;
|
|
set;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0'
|
|
'ABORT_ON_ERROR','0'
|
|
'ALLOW_UNSUPPORTED_FORMATS','0'
|
|
'BATCH_SIZE','0'
|
|
'DEBUG_ACTION',''
|
|
'DEFAULT_ORDER_BY_LIMIT','-1'
|
|
'DISABLE_CACHED_READS','0'
|
|
'DISABLE_CODEGEN','0'
|
|
'DISABLE_OUTERMOST_TOPN','0'
|
|
'EXPLAIN_LEVEL','3'
|
|
'HBASE_CACHE_BLOCKS','0'
|
|
'HBASE_CACHING','0'
|
|
'MAX_ERRORS','100'
|
|
'MAX_IO_BUFFERS','0'
|
|
'MAX_SCAN_RANGE_LENGTH','0'
|
|
'MEM_LIMIT','0'
|
|
'NUM_NODES','0'
|
|
'NUM_SCANNER_THREADS','0'
|
|
'COMPRESSION_CODEC','NONE'
|
|
'PARQUET_FILE_SIZE','0'
|
|
'REQUEST_POOL',''
|
|
'RESERVATION_REQUEST_TIMEOUT','0'
|
|
'RM_INITIAL_MEM','0'
|
|
'SYNC_DDL','0'
|
|
'V_CPU_CORES','0'
|
|
---- TYPES
|
|
STRING, STRING
|
|
====
|
|
---- QUERY
|
|
set explain_level='0';
|
|
set;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0'
|
|
'ABORT_ON_ERROR','0'
|
|
'ALLOW_UNSUPPORTED_FORMATS','0'
|
|
'BATCH_SIZE','0'
|
|
'DEBUG_ACTION',''
|
|
'DEFAULT_ORDER_BY_LIMIT','-1'
|
|
'DISABLE_CACHED_READS','0'
|
|
'DISABLE_CODEGEN','0'
|
|
'DISABLE_OUTERMOST_TOPN','0'
|
|
'EXPLAIN_LEVEL','0'
|
|
'HBASE_CACHE_BLOCKS','0'
|
|
'HBASE_CACHING','0'
|
|
'MAX_ERRORS','100'
|
|
'MAX_IO_BUFFERS','0'
|
|
'MAX_SCAN_RANGE_LENGTH','0'
|
|
'MEM_LIMIT','0'
|
|
'NUM_NODES','0'
|
|
'NUM_SCANNER_THREADS','0'
|
|
'COMPRESSION_CODEC','NONE'
|
|
'PARQUET_FILE_SIZE','0'
|
|
'REQUEST_POOL',''
|
|
'RESERVATION_REQUEST_TIMEOUT','0'
|
|
'RM_INITIAL_MEM','0'
|
|
'SYNC_DDL','0'
|
|
'V_CPU_CORES','0'
|
|
---- TYPES
|
|
STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1906: Test that SET changes PARQUET_FILE_SIZE only if it's less than 2GB.
|
|
set parquet_file_size='1.5g';
|
|
set;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0'
|
|
'ABORT_ON_ERROR','0'
|
|
'ALLOW_UNSUPPORTED_FORMATS','0'
|
|
'BATCH_SIZE','0'
|
|
'DEBUG_ACTION',''
|
|
'DEFAULT_ORDER_BY_LIMIT','-1'
|
|
'DISABLE_CACHED_READS','0'
|
|
'DISABLE_CODEGEN','0'
|
|
'DISABLE_OUTERMOST_TOPN','0'
|
|
'EXPLAIN_LEVEL','1'
|
|
'HBASE_CACHE_BLOCKS','0'
|
|
'HBASE_CACHING','0'
|
|
'MAX_ERRORS','100'
|
|
'MAX_IO_BUFFERS','0'
|
|
'MAX_SCAN_RANGE_LENGTH','0'
|
|
'MEM_LIMIT','0'
|
|
'NUM_NODES','0'
|
|
'NUM_SCANNER_THREADS','0'
|
|
'COMPRESSION_CODEC','NONE'
|
|
'PARQUET_FILE_SIZE','1610612736'
|
|
'REQUEST_POOL',''
|
|
'RESERVATION_REQUEST_TIMEOUT','0'
|
|
'RM_INITIAL_MEM','0'
|
|
'SYNC_DDL','0'
|
|
'V_CPU_CORES','0'
|
|
---- TYPES
|
|
STRING, STRING
|
|
====
|
|
---- QUERY
|
|
set parquet_file_size='2g'
|
|
---- CATCH
|
|
The PARQUET_FILE_SIZE query option must be less than 2GB.
|
|
====
|
|
---- QUERY
|
|
set foo=bar
|
|
---- CATCH
|
|
Invalid query option: foo
|
|
====
|
|
---- QUERY
|
|
set parquet_compression_codec=bar
|
|
---- CATCH
|
|
Invalid compression codec: bar
|
|
====
|
|
---- QUERY
|
|
# Test that SET actually does change the mem_limit.
|
|
# First, show mem_limit is not hit.
|
|
select 1
|
|
---- RESULTS
|
|
1
|
|
====
|
|
---- QUERY
|
|
# Set mem_limit really small so that queries will fail.
|
|
set mem_limit=1;
|
|
select count(string_col) from functional.alltypestiny
|
|
---- CATCH
|
|
Memory limit exceeded
|
|
====
|
|
---- QUERY
|
|
# Set mem_limit back to unlimited and query should succeed again.
|
|
set mem_limit=0;
|
|
select count(string_col) from functional.alltypestiny
|
|
---- RESULTS
|
|
8
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3334: 'optimize_partition_key_scans' is a boolean query option
|
|
set explain_level=0;
|
|
set optimize_partition_key_scans=true;
|
|
explain select min(month), max(year), ndv(day) from functional.alltypesagg;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'01:AGGREGATE [FINALIZE]'
|
|
'00:UNION'
|
|
' constant-operands=11'
|
|
====
|
|
---- QUERY
|
|
set explain_level=0;
|
|
set optimize_partition_key_scans=1;
|
|
explain select min(month), max(year), ndv(day) from functional.alltypesagg;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'01:AGGREGATE [FINALIZE]'
|
|
'00:UNION'
|
|
' constant-operands=11'
|
|
====
|
|
---- QUERY
|
|
set explain_level=0;
|
|
set optimize_partition_key_scans=false;
|
|
explain select min(month), max(year), ndv(day) from functional.alltypesagg;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'03:AGGREGATE [FINALIZE]'
|
|
'02:EXCHANGE [UNPARTITIONED]'
|
|
'01:AGGREGATE'
|
|
'00:SCAN HDFS [functional.alltypesagg]'
|
|
====
|
|
---- QUERY
|
|
set explain_level=0;
|
|
set optimize_partition_key_scans=0;
|
|
explain select min(month), max(year), ndv(day) from functional.alltypesagg;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'03:AGGREGATE [FINALIZE]'
|
|
'02:EXCHANGE [UNPARTITIONED]'
|
|
'01:AGGREGATE'
|
|
'00:SCAN HDFS [functional.alltypesagg]'
|
|
====
|
|
---- QUERY
|
|
set explain_level=0;
|
|
set disable_streaming_preaggregations=false;
|
|
explain select count(distinct double_col) from functional.alltypesagg;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'06:AGGREGATE [FINALIZE]'
|
|
'05:EXCHANGE [UNPARTITIONED]'
|
|
'02:AGGREGATE'
|
|
'04:AGGREGATE'
|
|
'03:EXCHANGE [HASH(double_col)]'
|
|
'01:AGGREGATE [STREAMING]'
|
|
'00:SCAN HDFS [functional.alltypesagg]'
|
|
====
|
|
---- QUERY
|
|
set explain_level=0;
|
|
set disable_streaming_preaggregations=0;
|
|
explain select count(distinct double_col) from functional.alltypesagg;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'06:AGGREGATE [FINALIZE]'
|
|
'05:EXCHANGE [UNPARTITIONED]'
|
|
'02:AGGREGATE'
|
|
'04:AGGREGATE'
|
|
'03:EXCHANGE [HASH(double_col)]'
|
|
'01:AGGREGATE [STREAMING]'
|
|
'00:SCAN HDFS [functional.alltypesagg]'
|
|
====
|
|
---- QUERY
|
|
set explain_level=0;
|
|
set disable_streaming_preaggregations=true;
|
|
explain select count(distinct double_col) from functional.alltypesagg;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'06:AGGREGATE [FINALIZE]'
|
|
'05:EXCHANGE [UNPARTITIONED]'
|
|
'02:AGGREGATE'
|
|
'04:AGGREGATE'
|
|
'03:EXCHANGE [HASH(double_col)]'
|
|
'01:AGGREGATE'
|
|
'00:SCAN HDFS [functional.alltypesagg]'
|
|
====
|
|
---- QUERY
|
|
set explain_level=0;
|
|
set disable_streaming_preaggregations=1;
|
|
explain select count(distinct double_col) from functional.alltypesagg;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'06:AGGREGATE [FINALIZE]'
|
|
'05:EXCHANGE [UNPARTITIONED]'
|
|
'02:AGGREGATE'
|
|
'04:AGGREGATE'
|
|
'03:EXCHANGE [HASH(double_col)]'
|
|
'01:AGGREGATE'
|
|
'00:SCAN HDFS [functional.alltypesagg]'
|
|
====
|
|
---- QUERY
|
|
# IMPALA-5591: This shouldn't throw an error.
|
|
set scratch_limit=-1;
|
|
====
|
|
---- QUERY
|
|
# Power of two max_row_size.
|
|
set max_row_size=8m;
|
|
set;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'MAX_ROW_SIZE','8388608'
|
|
---- TYPES
|
|
STRING,STRING
|
|
====
|
|
---- QUERY
|
|
# Non power of two max_row_size.
|
|
set max_row_size=12345;
|
|
set;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'MAX_ROW_SIZE','12345'
|
|
---- TYPES
|
|
STRING,STRING
|
|
====
|
|
---- QUERY
|
|
set max_row_size=8m;
|
|
====
|
|
---- QUERY
|
|
set max_row_size=-1;
|
|
---- CATCH
|
|
Max row size must be a positive number of bytes: -1
|
|
====
|
|
---- QUERY
|
|
set max_row_size=0;
|
|
---- CATCH
|
|
Max row size must be a positive number of bytes: 0
|
|
====
|