mirror of
https://github.com/apache/impala.git
synced 2026-01-31 09:00:19 -05:00
Result spooling has been relatively stable since it was introduced, and it has several benefits described in IMPALA-8656. This patch enable result spooling (SPOOL_QUERY_RESULTS) query options by default. Furthermore, some tests need to be adjusted to account for result spooling by default. The following are the adjustment categories and list of tests that fall under such category. Change in assertions: PlannerTest#testAcidTableScans PlannerTest#testBloomFilterAssignment PlannerTest#testConstantFolding PlannerTest#testFkPkJoinDetection PlannerTest#testFkPkJoinDetectionWithHDFSNumRowsEstDisabled PlannerTest#testKuduSelectivity PlannerTest#testMaxRowSize PlannerTest#testMinMaxRuntimeFilters PlannerTest#testMinMaxRuntimeFiltersWithHDFSNumRowsEstDisabled PlannerTest#testMtDopValidation PlannerTest#testParquetFiltering PlannerTest#testParquetFilteringDisabled PlannerTest#testPartitionPruning PlannerTest#testPreaggBytesLimit PlannerTest#testResourceRequirements PlannerTest#testRuntimeFilterQueryOptions PlannerTest#testSortExprMaterialization PlannerTest#testSpillableBufferSizing PlannerTest#testTableSample PlannerTest#testTpch PlannerTest#testKuduTpch PlannerTest#testTpchNested PlannerTest#testUnion TpcdsPlannerTest custom_cluster/test_admission_controller.py::TestAdmissionController::test_dedicated_coordinator_planner_estimates custom_cluster/test_admission_controller.py::TestAdmissionController::test_memory_rejection custom_cluster/test_admission_controller.py::TestAdmissionController::test_pool_mem_limit_configs metadata/test_explain.py::TestExplain::test_explain_level2 metadata/test_explain.py::TestExplain::test_explain_level3 metadata/test_stats_extrapolation.py::TestStatsExtrapolation::test_stats_extrapolation Increase BUFFER_POOL_LIMIT: query_test/test_queries.py::TestQueries::test_analytic_fns query_test/test_runtime_filters.py::TestRuntimeRowFilters::test_row_filter_reservation query_test/test_sort.py::TestQueryFullSort::test_multiple_mem_limits_full_output query_test/test_spilling.py::TestSpillingBroadcastJoins::test_spilling_broadcast_joins query_test/test_spilling.py::TestSpillingDebugActionDimensions::test_spilling_aggs query_test/test_spilling.py::TestSpillingDebugActionDimensions::test_spilling_regression_exhaustive query_test/test_udfs.py::TestUdfExecution::test_mem_limits Increase MEM_LIMIT: query_test/test_mem_usage_scaling.py::TestExchangeMemUsage::test_exchange_mem_usage_scaling query_test/test_mem_usage_scaling.py::TestScanMemLimit::test_hdfs_scanner_thread_mem_scaling Increase MAX_ROW_SIZE: custom_cluster/test_parquet_max_page_header.py::TestParquetMaxPageHeader::test_large_page_header_config query_test/test_insert.py::TestInsertQueries::test_insert_large_string query_test/test_query_mem_limit.py::TestQueryMemLimit::test_mem_limit query_test/test_scanners.py::TestTextSplitDelimiters::test_text_split_across_buffers_delimiter query_test/test_scanners.py::TestWideRow::test_wide_row Disable result spooling to maintain assertion: custom_cluster/test_admission_controller.py::TestAdmissionController::test_set_request_pool custom_cluster/test_admission_controller.py::TestAdmissionController::test_timeout_reason_host_memory custom_cluster/test_admission_controller.py::TestAdmissionController::test_timeout_reason_pool_memory custom_cluster/test_admission_controller.py::TestAdmissionController::test_queue_reasons_memory custom_cluster/test_admission_controller.py::TestAdmissionController::test_pool_config_change_while_queued custom_cluster/test_query_retries.py::TestQueryRetries::test_retry_fetched_rows custom_cluster/test_query_retries.py::TestQueryRetries::test_retry_finished_query custom_cluster/test_scratch_disk.py::TestScratchDir::test_no_dirs custom_cluster/test_scratch_disk.py::TestScratchDir::test_non_existing_dirs custom_cluster/test_scratch_disk.py::TestScratchDir::test_non_writable_dirs query_test/test_insert.py::TestInsertQueries::test_insert_large_string (the last query only) query_test/test_kudu.py::TestKuduMemLimits::test_low_mem_limit_low_selectivity_scan query_test/test_mem_usage_scaling.py::TestScanMemLimit::test_kudu_scan_mem_usage query_test/test_queries.py::TestQueriesParquetTables::test_very_large_strings query_test/test_query_mem_limit.py::TestCodegenMemLimit::test_codegen_mem_limit shell/test_shell_client.py::TestShellClient::test_fetch_size Testing: - Pass exhaustive tests. Change-Id: I9e360c1428676d8f3fab5d95efee18aca085eba4 Reviewed-on: http://gerrit.cloudera.org:8080/16755 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
199 lines
4.8 KiB
Plaintext
199 lines
4.8 KiB
Plaintext
====
|
|
---- QUERY
|
|
set buffer_pool_limit=30m;
|
|
select l_orderkey, count(*)
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 10
|
|
---- RESULTS
|
|
1,6
|
|
2,1
|
|
3,6
|
|
4,1
|
|
5,3
|
|
6,1
|
|
7,7
|
|
32,6
|
|
33,4
|
|
34,3
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
---- RUNTIME_PROFILE
|
|
# Verify that spilling and passthrough were activated.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test query with string grouping column and string agg columns
|
|
set buffer_pool_limit=82m;
|
|
set num_nodes=1;
|
|
select l_returnflag, l_orderkey, avg(l_tax), min(l_shipmode)
|
|
from lineitem
|
|
group by 1,2
|
|
order by 1,2 limit 3
|
|
---- RESULTS
|
|
'A',3,0.050000,'RAIL'
|
|
'A',5,0.030000,'AIR'
|
|
'A',6,0.030000,'TRUCK'
|
|
---- TYPES
|
|
STRING, BIGINT, DECIMAL, STRING
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test query with string grouping column
|
|
set buffer_pool_limit=34m;
|
|
set num_nodes=1;
|
|
select l_comment, count(*)
|
|
from lineitem
|
|
group by 1
|
|
order by count(*) desc limit 5
|
|
---- RESULTS
|
|
' furiously',943
|
|
' carefully',893
|
|
' carefully ',875
|
|
'carefully ',854
|
|
' furiously ',845
|
|
---- TYPES
|
|
STRING, BIGINT
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test query with string grouping column and string agg columns
|
|
set buffer_pool_limit=82m;
|
|
set num_nodes=1;
|
|
select l_returnflag, l_orderkey, round(avg(l_tax),2), min(l_shipmode)
|
|
from lineitem
|
|
group by 1,2
|
|
order by 1,2 limit 3;
|
|
---- RESULTS
|
|
'A',3,0.05,'RAIL'
|
|
'A',5,0.03,'AIR'
|
|
'A',6,0.03,'TRUCK'
|
|
---- TYPES
|
|
STRING, BIGINT, DECIMAL, STRING
|
|
---- RUNTIME_PROFILE
|
|
# Verify that spilling happened in the agg.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test with non-scalar intermediate state (avg() uses fixed intermediate value).
|
|
set buffer_pool_limit=34m;
|
|
select l_orderkey, avg(l_orderkey)
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 5
|
|
---- RESULTS
|
|
1,1
|
|
2,2
|
|
3,3
|
|
4,4
|
|
5,5
|
|
---- TYPES
|
|
BIGINT, DOUBLE
|
|
---- RUNTIME_PROFILE
|
|
# Verify that passthrough and spilling happened in the pre and merge agg.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test aggregation spill with group_concat distinct
|
|
set buffer_pool_limit=50m;
|
|
select l_orderkey, count(*), group_concat(distinct l_linestatus, '|')
|
|
from lineitem
|
|
group by 1
|
|
order by 1 limit 10
|
|
---- RESULTS
|
|
1,6,'O'
|
|
2,1,'O'
|
|
3,6,'F'
|
|
4,1,'O'
|
|
5,3,'F'
|
|
6,1,'F'
|
|
7,7,'O'
|
|
32,6,'O'
|
|
33,4,'F'
|
|
34,3,'O'
|
|
---- TYPES
|
|
BIGINT, BIGINT, STRING
|
|
---- RUNTIME_PROFILE
|
|
# Verify that at least one of the aggs spilled.
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Test aggregation with minimum required reservation to exercise IMPALA-2708.
|
|
# Merge aggregation requires 17 buffers and preaggregation requires 16 buffers
|
|
# plus 1mb of hash tables. The buffer size is 256k for this test. The scan needs 24MB.
|
|
set max_row_size=256k;
|
|
set buffer_pool_limit="33.75M";
|
|
select count(*)
|
|
from (select distinct * from orders) t
|
|
---- TYPES
|
|
BIGINT
|
|
---- RESULTS
|
|
1500000
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Multiple distinct
|
|
set buffer_pool_limit=30M;
|
|
select count(distinct l_orderkey), count(distinct l_partkey) from lineitem
|
|
---- TYPES
|
|
BIGINT,BIGINT
|
|
---- RESULTS
|
|
1500000,200000
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Multiple distinct with string col and group by
|
|
set buffer_pool_limit=50m;
|
|
select l_linenumber, count(distinct l_orderkey), count(distinct l_comment) from lineitem
|
|
group by 1 order by 1 limit 5
|
|
---- TYPES
|
|
INT,BIGINT,BIGINT
|
|
---- RESULTS
|
|
1,1500000,1273334
|
|
2,1285828,1102714
|
|
3,1071394,929553
|
|
4,857015,753374
|
|
5,643287,574337
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Multiple distinct and non-distinct, with an intermediate tuple (avg)
|
|
set buffer_pool_limit=40m;
|
|
select avg(distinct l_orderkey), count(distinct l_partkey), sum(l_tax), count(l_suppkey)
|
|
from tpch_parquet.lineitem
|
|
---- TYPES
|
|
DOUBLE,BIGINT,DECIMAL,BIGINT
|
|
---- RESULTS
|
|
2999991.5,200000,240129.67,6001215
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|
|
---- QUERY
|
|
# Multiple distinct and non-distinct, with a group by
|
|
set buffer_pool_limit=55m;
|
|
select l_linenumber, avg(distinct l_orderkey), count(distinct l_partkey), sum(l_tax), count(l_suppkey)
|
|
from tpch_parquet.lineitem
|
|
group by 1 order by 1 limit 5
|
|
---- TYPES
|
|
INT,DOUBLE,BIGINT,DECIMAL,BIGINT
|
|
---- RESULTS
|
|
1,2999991.5,199893,60025.25,1500000
|
|
2,3000615.766574534,199674,51457.37,1285828
|
|
3,3000079.631604246,199036,42879.38,1071394
|
|
4,3000330.547357981,197222,34279.36,857015
|
|
5,2999188.900650876,191905,25745.25,643287
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\)
|
|
row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
|
|
====
|