Files
impala/tests/query_test/test_query_mem_limit.py
Riza Suminto 49ac55fb69 IMPALA-9856: Enable result spooling by default.
Result spooling has been relatively stable since it was introduced, and
it has several benefits described in IMPALA-8656. This patch enable
result spooling (SPOOL_QUERY_RESULTS) query options by default.

Furthermore, some tests need to be adjusted to account for result
spooling by default. The following are the adjustment categories and
list of tests that fall under such category.

Change in assertions:
PlannerTest#testAcidTableScans
PlannerTest#testBloomFilterAssignment
PlannerTest#testConstantFolding
PlannerTest#testFkPkJoinDetection
PlannerTest#testFkPkJoinDetectionWithHDFSNumRowsEstDisabled
PlannerTest#testKuduSelectivity
PlannerTest#testMaxRowSize
PlannerTest#testMinMaxRuntimeFilters
PlannerTest#testMinMaxRuntimeFiltersWithHDFSNumRowsEstDisabled
PlannerTest#testMtDopValidation
PlannerTest#testParquetFiltering
PlannerTest#testParquetFilteringDisabled
PlannerTest#testPartitionPruning
PlannerTest#testPreaggBytesLimit
PlannerTest#testResourceRequirements
PlannerTest#testRuntimeFilterQueryOptions
PlannerTest#testSortExprMaterialization
PlannerTest#testSpillableBufferSizing
PlannerTest#testTableSample
PlannerTest#testTpch
PlannerTest#testKuduTpch
PlannerTest#testTpchNested
PlannerTest#testUnion
TpcdsPlannerTest
custom_cluster/test_admission_controller.py::TestAdmissionController::test_dedicated_coordinator_planner_estimates
custom_cluster/test_admission_controller.py::TestAdmissionController::test_memory_rejection
custom_cluster/test_admission_controller.py::TestAdmissionController::test_pool_mem_limit_configs
metadata/test_explain.py::TestExplain::test_explain_level2
metadata/test_explain.py::TestExplain::test_explain_level3
metadata/test_stats_extrapolation.py::TestStatsExtrapolation::test_stats_extrapolation

Increase BUFFER_POOL_LIMIT:
query_test/test_queries.py::TestQueries::test_analytic_fns
query_test/test_runtime_filters.py::TestRuntimeRowFilters::test_row_filter_reservation
query_test/test_sort.py::TestQueryFullSort::test_multiple_mem_limits_full_output
query_test/test_spilling.py::TestSpillingBroadcastJoins::test_spilling_broadcast_joins
query_test/test_spilling.py::TestSpillingDebugActionDimensions::test_spilling_aggs
query_test/test_spilling.py::TestSpillingDebugActionDimensions::test_spilling_regression_exhaustive
query_test/test_udfs.py::TestUdfExecution::test_mem_limits

Increase MEM_LIMIT:
query_test/test_mem_usage_scaling.py::TestExchangeMemUsage::test_exchange_mem_usage_scaling
query_test/test_mem_usage_scaling.py::TestScanMemLimit::test_hdfs_scanner_thread_mem_scaling

Increase MAX_ROW_SIZE:
custom_cluster/test_parquet_max_page_header.py::TestParquetMaxPageHeader::test_large_page_header_config
query_test/test_insert.py::TestInsertQueries::test_insert_large_string
query_test/test_query_mem_limit.py::TestQueryMemLimit::test_mem_limit
query_test/test_scanners.py::TestTextSplitDelimiters::test_text_split_across_buffers_delimiter
query_test/test_scanners.py::TestWideRow::test_wide_row

Disable result spooling to maintain assertion:
custom_cluster/test_admission_controller.py::TestAdmissionController::test_set_request_pool
custom_cluster/test_admission_controller.py::TestAdmissionController::test_timeout_reason_host_memory
custom_cluster/test_admission_controller.py::TestAdmissionController::test_timeout_reason_pool_memory
custom_cluster/test_admission_controller.py::TestAdmissionController::test_queue_reasons_memory
custom_cluster/test_admission_controller.py::TestAdmissionController::test_pool_config_change_while_queued
custom_cluster/test_query_retries.py::TestQueryRetries::test_retry_fetched_rows
custom_cluster/test_query_retries.py::TestQueryRetries::test_retry_finished_query
custom_cluster/test_scratch_disk.py::TestScratchDir::test_no_dirs
custom_cluster/test_scratch_disk.py::TestScratchDir::test_non_existing_dirs
custom_cluster/test_scratch_disk.py::TestScratchDir::test_non_writable_dirs
query_test/test_insert.py::TestInsertQueries::test_insert_large_string (the last query only)
query_test/test_kudu.py::TestKuduMemLimits::test_low_mem_limit_low_selectivity_scan
query_test/test_mem_usage_scaling.py::TestScanMemLimit::test_kudu_scan_mem_usage
query_test/test_queries.py::TestQueriesParquetTables::test_very_large_strings
query_test/test_query_mem_limit.py::TestCodegenMemLimit::test_codegen_mem_limit
shell/test_shell_client.py::TestShellClient::test_fetch_size

Testing:
- Pass exhaustive tests.

Change-Id: I9e360c1428676d8f3fab5d95efee18aca085eba4
Reviewed-on: http://gerrit.cloudera.org:8080/16755
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2021-03-02 04:58:51 +00:00

138 lines
5.7 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Targeted tests to validate per-query memory limit.
import pytest
import re
import sys
from copy import copy
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.skip import SkipIfEC
from tests.common.test_dimensions import (
ImpalaTestDimension,
create_single_exec_option_dimension,
create_uncompressed_text_dimension)
class TestQueryMemLimit(ImpalaTestSuite):
"""Test class to do functional validation of per query memory limits.
A specific query is run on text files, with the memory limit being added as
an extra dimension. The query is expected to fail/pass depending on the limit
value.
"""
# There are a lot of 'unique' comments in lineitem.
# Almost 80% of the table size.
QUERIES = ["select count(distinct l_comment) from lineitem",
"select group_concat(l_linestatus) from lineitem"]
# TODO: It will be nice if we can get how much memory a query uses
# dynamically, even if it is a rough approximation.
# A mem_limit is expressed in bytes, with values <= 0 signifying no cap.
# These values are either really small, unlimited, or have a really large cap.
MAXINT_BYTES = str(sys.maxint)
MAXINT_MB = str(sys.maxint/(1024*1024))
MAXINT_GB = str(sys.maxint/(1024*1024*1024))
# We expect the tests with MAXINT_* using valid units [bmg] to succeed.
PASS_REGEX = re.compile("(%s|%s|%s)[bmg]?$" % (MAXINT_BYTES, MAXINT_MB, MAXINT_GB),
re.I)
MEM_LIMITS = ["-1", "0", "1", "10", "100", "1000", "10000", MAXINT_BYTES,
MAXINT_BYTES + "b", MAXINT_BYTES + "B",
MAXINT_MB + "m", MAXINT_MB + "M",
MAXINT_GB + "g", MAXINT_GB + "G",
# invalid per-query memory limits
"-1234", "-3.14", "xyz", "100%", MAXINT_BYTES + "k", "k" + MAXINT_BYTES]
MEM_LIMITS_CORE = ["-1", "0", "10000", MAXINT_BYTES,
MAXINT_BYTES + "b", MAXINT_MB + "M", MAXINT_GB + "g"]
@classmethod
def get_workload(self):
return 'tpch'
@classmethod
def add_test_dimensions(cls):
super(TestQueryMemLimit, cls).add_test_dimensions()
# Only run the query for text
cls.ImpalaTestMatrix.add_dimension(
create_uncompressed_text_dimension(cls.get_workload()))
# add mem_limit as a test dimension.
if cls.exploration_strategy() == 'core':
cls.ImpalaTestMatrix.add_dimension(\
ImpalaTestDimension('mem_limit', *TestQueryMemLimit.MEM_LIMITS_CORE))
else:
cls.ImpalaTestMatrix.add_dimension(\
ImpalaTestDimension('mem_limit', *TestQueryMemLimit.MEM_LIMITS))
# Make query a test dimension so we can support more queries.
cls.ImpalaTestMatrix.add_dimension(
ImpalaTestDimension('query', *TestQueryMemLimit.QUERIES))
# This query takes a very long time to finish with a bound on the batch_size.
# Remove the bound on the batch size.
cls.ImpalaTestMatrix.add_constraint(
lambda v: v.get_value('exec_option')['batch_size'] == 0)
@SkipIfEC.oom
@pytest.mark.execute_serially
def test_mem_limit(self, vector):
mem_limit = copy(vector.get_value('mem_limit'))
exec_options = copy(vector.get_value('exec_option'))
exec_options['mem_limit'] = mem_limit
# Send to the no-limits pool so that no memory limits apply.
exec_options['request_pool'] = "root.no-limits"
# IMPALA-9856: For the group_concat query, this test expect a resulting row up to
# 17.17 MB in size.Therefore, we explicitly set 18 MB MAX_ROW_SIZE here so that it
# can fit in BufferedPlanRootSink.
exec_options['max_row_size'] = '18M'
query = vector.get_value('query')
table_format = vector.get_value('table_format')
if mem_limit in["0", "-1"] or self.PASS_REGEX.match(mem_limit):
# should succeed
self.__exec_query(query, exec_options, True, table_format)
else:
# should fail
self.__exec_query(query, exec_options, False, table_format)
def __exec_query(self, query, exec_options, should_succeed, table_format):
try:
self.execute_query(query, exec_options, table_format=table_format)
assert should_succeed, "Query was expected to fail"
except ImpalaBeeswaxException, e:
assert not should_succeed, "Query should not have failed: %s" % e
class TestCodegenMemLimit(ImpalaTestSuite):
"""Tests that memory limit applies to codegen """
@classmethod
def get_workload(self):
return 'functional-query'
@classmethod
def add_test_dimensions(cls):
super(TestCodegenMemLimit, cls).add_test_dimensions()
# Run with num_nodes=1 to avoid races between fragments allocating memory.
cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension(
num_nodes=1, disable_codegen_rows_threshold=0))
# Only run the query for parquet
cls.ImpalaTestMatrix.add_constraint(
lambda v: v.get_value('table_format').file_format == 'parquet')
def test_codegen_mem_limit(self, vector):
self.run_test_case('QueryTest/codegen-mem-limit', vector)