Files
impala/testdata/workloads/functional-query/queries/QueryTest/set.test
Tim Armstrong 95ed4434f2 IMPALA-3202,IMPALA-2079: rework scratch file I/O
Refactor BufferedBlockMgr/TmpFileMgr to push more I/O logic into
TmpFileMgr, in anticipation of it being shared with BufferPool.
TmpFileMgr now handles:
* Scratch space allocation and recycling
* Read and write I/O

The interface is also greatly changed so that it is built around Write()
and Read() calls, abstracting away the details of temporary file
allocation from clients. This means the TmpFileMgr::File class can
be hidden from clients.

Write error recovery:
Also implement write error recovery in TmpFileMgr.

If an error occurs while writing to scratch and we have multiple
scratch directories, we will try one of the other directories
before cancelling the query. File-level blacklisting is used to
prevent excessive repeated attempts to resize a scratch file during
a single query. Device-level blacklisting is not implemented because
it is problematic to permanently take a scratch directory out of use.

To reduce the number of error paths, all I/O errors are now handled
asynchronously. Previously errors creating or extending the file were
returned synchronously from WriteUnpinnedBlock(). This required
modifying DiskIoMgr to create the file if not present when opened.

Also set the default max_errors value in the thrift definition file,
so that it is in effect for backend tests.

Future Work:
* Support for recycling variable-length scratch file ranges. I omitted
  this to avoid making the patch even large.

Testing:
Updated BufferedBlockMgr unit test to reflect changes in behaviour:
* Scratch space is no longer permanently associated with a block, and
  is remapped every time a new block is written to disk .
* Files are now blacklisted - updated existing tests and enable the
  disable blacklisting test.

Added some basic testing of recycling of scratch file ranges in
the TmpFileMgr unit test.

I also manually tested the code in two ways. First by removing permissions
for /tmp/impala-scratch and ensuring that a spilling query fails cleanly.
Second, by creating a tiny ramdisk (16M) and running with two scratch
directories: one on /tmp and one on the tiny ramdisk. When spilling, an
out of space error is encountered for the tiny ramdisk and impala spills
the remaining data (72M) to /tmp.

Change-Id: I8c9c587df006d2f09d72dd636adafbd295fcdc17
Reviewed-on: http://gerrit.cloudera.org:8080/5141
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Impala Public Jenkins
2017-01-05 02:26:24 +00:00

259 lines
6.0 KiB
Plaintext

====
---- QUERY
set
---- RESULTS: VERIFY_IS_SUBSET
'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0'
'ABORT_ON_ERROR','0'
'ALLOW_UNSUPPORTED_FORMATS','0'
'BATCH_SIZE','0'
'DEBUG_ACTION',''
'DEFAULT_ORDER_BY_LIMIT','-1'
'DISABLE_CACHED_READS','0'
'DISABLE_CODEGEN','0'
'DISABLE_OUTERMOST_TOPN','0'
'EXPLAIN_LEVEL','1'
'HBASE_CACHE_BLOCKS','0'
'HBASE_CACHING','0'
'MAX_ERRORS','100'
'MAX_IO_BUFFERS','0'
'MAX_SCAN_RANGE_LENGTH','0'
'MEM_LIMIT','0'
'NUM_NODES','0'
'NUM_SCANNER_THREADS','0'
'COMPRESSION_CODEC','NONE'
'PARQUET_FILE_SIZE','0'
'REQUEST_POOL',''
'RESERVATION_REQUEST_TIMEOUT','0'
'RM_INITIAL_MEM','0'
'SYNC_DDL','0'
'V_CPU_CORES','0'
---- TYPES
STRING, STRING
====
---- QUERY
set explain_level=3;
set;
---- RESULTS: VERIFY_IS_SUBSET
'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0'
'ABORT_ON_ERROR','0'
'ALLOW_UNSUPPORTED_FORMATS','0'
'BATCH_SIZE','0'
'DEBUG_ACTION',''
'DEFAULT_ORDER_BY_LIMIT','-1'
'DISABLE_CACHED_READS','0'
'DISABLE_CODEGEN','0'
'DISABLE_OUTERMOST_TOPN','0'
'EXPLAIN_LEVEL','3'
'HBASE_CACHE_BLOCKS','0'
'HBASE_CACHING','0'
'MAX_ERRORS','100'
'MAX_IO_BUFFERS','0'
'MAX_SCAN_RANGE_LENGTH','0'
'MEM_LIMIT','0'
'NUM_NODES','0'
'NUM_SCANNER_THREADS','0'
'COMPRESSION_CODEC','NONE'
'PARQUET_FILE_SIZE','0'
'REQUEST_POOL',''
'RESERVATION_REQUEST_TIMEOUT','0'
'RM_INITIAL_MEM','0'
'SYNC_DDL','0'
'V_CPU_CORES','0'
---- TYPES
STRING, STRING
====
---- QUERY
set explain_level='0';
set;
---- RESULTS: VERIFY_IS_SUBSET
'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0'
'ABORT_ON_ERROR','0'
'ALLOW_UNSUPPORTED_FORMATS','0'
'BATCH_SIZE','0'
'DEBUG_ACTION',''
'DEFAULT_ORDER_BY_LIMIT','-1'
'DISABLE_CACHED_READS','0'
'DISABLE_CODEGEN','0'
'DISABLE_OUTERMOST_TOPN','0'
'EXPLAIN_LEVEL','0'
'HBASE_CACHE_BLOCKS','0'
'HBASE_CACHING','0'
'MAX_ERRORS','100'
'MAX_IO_BUFFERS','0'
'MAX_SCAN_RANGE_LENGTH','0'
'MEM_LIMIT','0'
'NUM_NODES','0'
'NUM_SCANNER_THREADS','0'
'COMPRESSION_CODEC','NONE'
'PARQUET_FILE_SIZE','0'
'REQUEST_POOL',''
'RESERVATION_REQUEST_TIMEOUT','0'
'RM_INITIAL_MEM','0'
'SYNC_DDL','0'
'V_CPU_CORES','0'
---- TYPES
STRING, STRING
====
---- QUERY
# IMPALA-1906: Test that SET changes PARQUET_FILE_SIZE only if it's less than 2GB.
set parquet_file_size='1.5g';
set;
---- RESULTS: VERIFY_IS_SUBSET
'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0'
'ABORT_ON_ERROR','0'
'ALLOW_UNSUPPORTED_FORMATS','0'
'BATCH_SIZE','0'
'DEBUG_ACTION',''
'DEFAULT_ORDER_BY_LIMIT','-1'
'DISABLE_CACHED_READS','0'
'DISABLE_CODEGEN','0'
'DISABLE_OUTERMOST_TOPN','0'
'EXPLAIN_LEVEL','1'
'HBASE_CACHE_BLOCKS','0'
'HBASE_CACHING','0'
'MAX_ERRORS','100'
'MAX_IO_BUFFERS','0'
'MAX_SCAN_RANGE_LENGTH','0'
'MEM_LIMIT','0'
'NUM_NODES','0'
'NUM_SCANNER_THREADS','0'
'COMPRESSION_CODEC','NONE'
'PARQUET_FILE_SIZE','1610612736'
'REQUEST_POOL',''
'RESERVATION_REQUEST_TIMEOUT','0'
'RM_INITIAL_MEM','0'
'SYNC_DDL','0'
'V_CPU_CORES','0'
---- TYPES
STRING, STRING
====
---- QUERY
set parquet_file_size='2g'
---- CATCH
The PARQUET_FILE_SIZE query option must be less than 2GB.
====
---- QUERY
set foo=bar
---- CATCH
Invalid query option: foo
====
---- QUERY
set parquet_compression_codec=bar
---- CATCH
Invalid compression codec: bar
====
---- QUERY
# Test that SET actually does change the mem_limit.
# First, show mem_limit is not hit.
select 1
---- RESULTS
1
====
---- QUERY
# Set mem_limit really small so that queries will fail.
set mem_limit=1;
select count(string_col) from functional.alltypestiny
---- CATCH
Memory limit exceeded
====
---- QUERY
# Set mem_limit back to unlimited and query should succeed again.
set mem_limit=0;
select count(string_col) from functional.alltypestiny
---- RESULTS
8
---- TYPES
BIGINT
====
---- QUERY
# IMPALA-3334: 'optimize_partition_key_scans' is a boolean query option
set explain_level=0;
set optimize_partition_key_scans=true;
explain select min(month), max(year), ndv(day) from functional.alltypesagg;
---- RESULTS: VERIFY_IS_SUBSET
'01:AGGREGATE [FINALIZE]'
'00:UNION'
' constant-operands=11'
====
---- QUERY
set explain_level=0;
set optimize_partition_key_scans=1;
explain select min(month), max(year), ndv(day) from functional.alltypesagg;
---- RESULTS: VERIFY_IS_SUBSET
'01:AGGREGATE [FINALIZE]'
'00:UNION'
' constant-operands=11'
====
---- QUERY
set explain_level=0;
set optimize_partition_key_scans=false;
explain select min(month), max(year), ndv(day) from functional.alltypesagg;
---- RESULTS: VERIFY_IS_SUBSET
'03:AGGREGATE [FINALIZE]'
'02:EXCHANGE [UNPARTITIONED]'
'01:AGGREGATE'
'00:SCAN HDFS [functional.alltypesagg]'
====
---- QUERY
set explain_level=0;
set optimize_partition_key_scans=0;
explain select min(month), max(year), ndv(day) from functional.alltypesagg;
---- RESULTS: VERIFY_IS_SUBSET
'03:AGGREGATE [FINALIZE]'
'02:EXCHANGE [UNPARTITIONED]'
'01:AGGREGATE'
'00:SCAN HDFS [functional.alltypesagg]'
====
---- QUERY
set explain_level=0;
set disable_streaming_preaggregations=false;
explain select count(distinct double_col) from functional.alltypesagg;
---- RESULTS: VERIFY_IS_SUBSET
'06:AGGREGATE [FINALIZE]'
'05:EXCHANGE [UNPARTITIONED]'
'02:AGGREGATE'
'04:AGGREGATE'
'03:EXCHANGE [HASH(double_col)]'
'01:AGGREGATE [STREAMING]'
'00:SCAN HDFS [functional.alltypesagg]'
====
---- QUERY
set explain_level=0;
set disable_streaming_preaggregations=0;
explain select count(distinct double_col) from functional.alltypesagg;
---- RESULTS: VERIFY_IS_SUBSET
'06:AGGREGATE [FINALIZE]'
'05:EXCHANGE [UNPARTITIONED]'
'02:AGGREGATE'
'04:AGGREGATE'
'03:EXCHANGE [HASH(double_col)]'
'01:AGGREGATE [STREAMING]'
'00:SCAN HDFS [functional.alltypesagg]'
====
---- QUERY
set explain_level=0;
set disable_streaming_preaggregations=true;
explain select count(distinct double_col) from functional.alltypesagg;
---- RESULTS: VERIFY_IS_SUBSET
'06:AGGREGATE [FINALIZE]'
'05:EXCHANGE [UNPARTITIONED]'
'02:AGGREGATE'
'04:AGGREGATE'
'03:EXCHANGE [HASH(double_col)]'
'01:AGGREGATE'
'00:SCAN HDFS [functional.alltypesagg]'
====
---- QUERY
set explain_level=0;
set disable_streaming_preaggregations=1;
explain select count(distinct double_col) from functional.alltypesagg;
---- RESULTS: VERIFY_IS_SUBSET
'06:AGGREGATE [FINALIZE]'
'05:EXCHANGE [UNPARTITIONED]'
'02:AGGREGATE'
'04:AGGREGATE'
'03:EXCHANGE [HASH(double_col)]'
'01:AGGREGATE'
'00:SCAN HDFS [functional.alltypesagg]'
====