mirror of
https://github.com/apache/impala.git
synced 2026-01-01 09:00:42 -05:00
This patch makes the ownership of the memory backing the tuple pointers of a RowBatch dependent on whether the legacy joins and aggs are enabled: By default, the memory is malloc'd and owned by the RowBatch: If enable_partitioned_hash_join=true and enable_partitioned_aggregation=true then the memory is owned by the RowBatch and is freed upon its destruction. This mode is more performant especially with SubplanNodes in the ExecNode tree because the tuple pointers are not transferred and do not have to be re-created in every Reset(). Memory is allocated from MemPool: Otherwise, the memory is allocated from the RowBatch's tuple pool. As a result, the pointer memory is transferred just like tuple data, and must be re-created in Reset(). This mode is required for the legacy join and agg which rely on the tuple pointers being allocated from the RowBatch's tuple pool, so they can acquire ownership of the tuple pointers. Performance impact for nested types: Initial cluster runs and profiling on nested TPCH identified excessive malloc/frees as a major performance bottleneck. This change paves the way for further optimizations which yielded a 2x improvement in response time for most nested TPCH queries. Change-Id: I4ac58b18058ce46b4db89fbe117b0bcad19e9ee7 Reviewed-on: http://gerrit.cloudera.org:8080/807 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: Internal Jenkins
178 lines
3.6 KiB
Plaintext
178 lines
3.6 KiB
Plaintext
====
|
|
---- QUERY
|
|
set
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0'
|
|
'ABORT_ON_ERROR','0'
|
|
'ALLOW_UNSUPPORTED_FORMATS','0'
|
|
'BATCH_SIZE','0'
|
|
'DEBUG_ACTION',''
|
|
'DEFAULT_ORDER_BY_LIMIT','-1'
|
|
'DISABLE_CACHED_READS','0'
|
|
'DISABLE_CODEGEN','0'
|
|
'DISABLE_OUTERMOST_TOPN','0'
|
|
'EXPLAIN_LEVEL','1'
|
|
'HBASE_CACHE_BLOCKS','0'
|
|
'HBASE_CACHING','0'
|
|
'MAX_ERRORS','0'
|
|
'MAX_IO_BUFFERS','0'
|
|
'MAX_SCAN_RANGE_LENGTH','0'
|
|
'MEM_LIMIT','0'
|
|
'NUM_NODES','0'
|
|
'NUM_SCANNER_THREADS','0'
|
|
'COMPRESSION_CODEC','NONE'
|
|
'PARQUET_FILE_SIZE','0'
|
|
'REQUEST_POOL',''
|
|
'RESERVATION_REQUEST_TIMEOUT','0'
|
|
'RM_INITIAL_MEM','0'
|
|
'SYNC_DDL','0'
|
|
'V_CPU_CORES','0'
|
|
---- TYPES
|
|
STRING, STRING
|
|
====
|
|
---- QUERY
|
|
set explain_level=3
|
|
====
|
|
---- QUERY
|
|
set
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0'
|
|
'ABORT_ON_ERROR','0'
|
|
'ALLOW_UNSUPPORTED_FORMATS','0'
|
|
'BATCH_SIZE','0'
|
|
'DEBUG_ACTION',''
|
|
'DEFAULT_ORDER_BY_LIMIT','-1'
|
|
'DISABLE_CACHED_READS','0'
|
|
'DISABLE_CODEGEN','0'
|
|
'DISABLE_OUTERMOST_TOPN','0'
|
|
'EXPLAIN_LEVEL','3'
|
|
'HBASE_CACHE_BLOCKS','0'
|
|
'HBASE_CACHING','0'
|
|
'MAX_ERRORS','0'
|
|
'MAX_IO_BUFFERS','0'
|
|
'MAX_SCAN_RANGE_LENGTH','0'
|
|
'MEM_LIMIT','0'
|
|
'NUM_NODES','0'
|
|
'NUM_SCANNER_THREADS','0'
|
|
'COMPRESSION_CODEC','NONE'
|
|
'PARQUET_FILE_SIZE','0'
|
|
'REQUEST_POOL',''
|
|
'RESERVATION_REQUEST_TIMEOUT','0'
|
|
'RM_INITIAL_MEM','0'
|
|
'SYNC_DDL','0'
|
|
'V_CPU_CORES','0'
|
|
---- TYPES
|
|
STRING, STRING
|
|
====
|
|
---- QUERY
|
|
set explain_level='0'
|
|
====
|
|
---- QUERY
|
|
set
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0'
|
|
'ABORT_ON_ERROR','0'
|
|
'ALLOW_UNSUPPORTED_FORMATS','0'
|
|
'BATCH_SIZE','0'
|
|
'DEBUG_ACTION',''
|
|
'DEFAULT_ORDER_BY_LIMIT','-1'
|
|
'DISABLE_CACHED_READS','0'
|
|
'DISABLE_CODEGEN','0'
|
|
'DISABLE_OUTERMOST_TOPN','0'
|
|
'EXPLAIN_LEVEL','0'
|
|
'HBASE_CACHE_BLOCKS','0'
|
|
'HBASE_CACHING','0'
|
|
'MAX_ERRORS','0'
|
|
'MAX_IO_BUFFERS','0'
|
|
'MAX_SCAN_RANGE_LENGTH','0'
|
|
'MEM_LIMIT','0'
|
|
'NUM_NODES','0'
|
|
'NUM_SCANNER_THREADS','0'
|
|
'COMPRESSION_CODEC','NONE'
|
|
'PARQUET_FILE_SIZE','0'
|
|
'REQUEST_POOL',''
|
|
'RESERVATION_REQUEST_TIMEOUT','0'
|
|
'RM_INITIAL_MEM','0'
|
|
'SYNC_DDL','0'
|
|
'V_CPU_CORES','0'
|
|
---- TYPES
|
|
STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1906: Test that SET changes PARQUET_FILE_SIZE only if it's less than 2GB.
|
|
set parquet_file_size='1.5g'
|
|
====
|
|
---- QUERY
|
|
set
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0'
|
|
'ABORT_ON_ERROR','0'
|
|
'ALLOW_UNSUPPORTED_FORMATS','0'
|
|
'BATCH_SIZE','0'
|
|
'DEBUG_ACTION',''
|
|
'DEFAULT_ORDER_BY_LIMIT','-1'
|
|
'DISABLE_CACHED_READS','0'
|
|
'DISABLE_CODEGEN','0'
|
|
'DISABLE_OUTERMOST_TOPN','0'
|
|
'EXPLAIN_LEVEL','0'
|
|
'HBASE_CACHE_BLOCKS','0'
|
|
'HBASE_CACHING','0'
|
|
'MAX_ERRORS','0'
|
|
'MAX_IO_BUFFERS','0'
|
|
'MAX_SCAN_RANGE_LENGTH','0'
|
|
'MEM_LIMIT','0'
|
|
'NUM_NODES','0'
|
|
'NUM_SCANNER_THREADS','0'
|
|
'COMPRESSION_CODEC','NONE'
|
|
'PARQUET_FILE_SIZE','1610612736'
|
|
'REQUEST_POOL',''
|
|
'RESERVATION_REQUEST_TIMEOUT','0'
|
|
'RM_INITIAL_MEM','0'
|
|
'SYNC_DDL','0'
|
|
'V_CPU_CORES','0'
|
|
---- TYPES
|
|
STRING, STRING
|
|
====
|
|
---- QUERY
|
|
set parquet_file_size='2g'
|
|
---- CATCH
|
|
The PARQUET_FILE_SIZE query option must be less than 2GB.
|
|
====
|
|
---- QUERY
|
|
set foo=bar
|
|
---- CATCH
|
|
Ignoring invalid configuration option: foo
|
|
====
|
|
---- QUERY
|
|
set parquet_compression_codec=bar
|
|
---- CATCH
|
|
Invalid compression codec: bar
|
|
====
|
|
---- QUERY
|
|
# Test that SET actually does change the mem_limit.
|
|
# First, show mem_limit is not hit.
|
|
select 1
|
|
---- RESULTS
|
|
1
|
|
====
|
|
---- QUERY
|
|
# Set mem_limit really small so that queries will fail.
|
|
set mem_limit=1
|
|
====
|
|
---- QUERY
|
|
select count(string_col) from functional.alltypestiny
|
|
---- CATCH
|
|
Memory limit exceeded
|
|
====
|
|
---- QUERY
|
|
# Set mem_limit back to unlimited and query should succeed again.
|
|
set mem_limit=0
|
|
====
|
|
---- QUERY
|
|
select count(string_col) from functional.alltypestiny
|
|
---- RESULTS
|
|
8
|
|
---- TYPES
|
|
BIGINT
|
|
====
|