mirror of
https://github.com/apache/impala.git
synced 2026-01-22 18:02:34 -05:00
IMPALA-9856 enables result spooling by default. Result spooling depends on the ability to spill its entire BufferedTupleStream to disk once it hits maximum memory reservation. However, if the query option scratch_limit is set lower than max_spilled_result_spooling_mem, the query might fail in the middle of execution due to insufficient scratch space. This patch adds planner change to consider scratch_limit and scratch_dirs query option when computing resource used by result spooling. The algorithm is as follow: * If scratch_dirs is empty or scratch_limit < minMemReservationBytes required to use BufferedPlanRootSink, we set spool_query_results to false and fallback to use BlockingPlanRootSink. * If scratch_limit > minMemReservationBytes but still fairly low, we lower the max_result_spooling_mem (default is 100MB) and max_spilled_result_spooling_mem (default is 1GB) to fit scratch_limit. * if scratch_limit > max_spilled_result_spooling_mem, do nothing. Testing: - Add TestScratchLimit::test_result_spooling_and_varying_scratch_limit - Verify that spool_query_results query option is disabled in TestScratchDir::test_no_dirs - Pass exhaustive tests. Change-Id: I541f46e6911694e14c0fc25be1a6982fd929d3a9 Reviewed-on: http://gerrit.cloudera.org:8080/17166 Reviewed-by: Aman Sinha <amsinha@cloudera.com> Tested-by: Aman Sinha <amsinha@cloudera.com>
44 lines
2.0 KiB
Plaintext
44 lines
2.0 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Unbounded scratch_limit will not override default query options of result spooling.
|
|
set scratch_limit=-1;
|
|
select o_orderdate, o_custkey, o_comment from tpch.orders limit 100000;
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*set by configuration and planner.*
|
|
row_regex: \| mem-estimate=8.63MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
|
|
====
|
|
---- QUERY
|
|
# Result spooling should be disabled if scratch_limit is 0.
|
|
set scratch_limit=0;
|
|
select o_orderdate, o_custkey, o_comment from tpch.orders limit 100000;
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*set by configuration and planner.*SPOOL_QUERY_RESULTS=0
|
|
row_regex: \| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
|
====
|
|
---- QUERY
|
|
# Result spooling should be disabled if scratch_limit is less than minimum memory
|
|
# reservation required by result spooling (4MB).
|
|
set scratch_limit=2m;
|
|
select o_orderdate, o_custkey, o_comment from tpch.orders limit 100000;
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*set by configuration and planner.*SPOOL_QUERY_RESULTS=0
|
|
row_regex: \| mem-estimate=0B mem-reservation=0B thread-reservation=0
|
|
====
|
|
---- QUERY
|
|
# scratch_limit higher than minimum memory required by result spooling, but less than the
|
|
# default MAX_RESULT_SPOOLING_MEM (100MB).
|
|
set scratch_limit=7m;
|
|
select o_orderdate, o_custkey, o_comment from tpch.orders limit 100000;
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*set by configuration and planner.*MAX_RESULT_SPOOLING_MEM=5242880,MAX_SPILLED_RESULT_SPOOLING_MEM=5242880
|
|
row_regex: \| mem-estimate=5.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
|
|
====
|
|
---- QUERY
|
|
# scratch_limit higher than minimum memory required by result spooling, but less than the
|
|
# default MAX_SPILLED_RESULT_SPOOLING_MEM (1GB).
|
|
set scratch_limit=200m;
|
|
select o_orderdate, o_custkey, o_comment from tpch.orders limit 100000;
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*set by configuration and planner.*MAX_SPILLED_RESULT_SPOOLING_MEM=207618048
|
|
row_regex: \| mem-estimate=8.63MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
|
|
==== |