mirror of
https://github.com/apache/impala.git
synced 2026-02-03 00:00:40 -05:00
Result spooling has been relatively stable since it was introduced, and it has several benefits described in IMPALA-8656. This patch enable result spooling (SPOOL_QUERY_RESULTS) query options by default. Furthermore, some tests need to be adjusted to account for result spooling by default. The following are the adjustment categories and list of tests that fall under such category. Change in assertions: PlannerTest#testAcidTableScans PlannerTest#testBloomFilterAssignment PlannerTest#testConstantFolding PlannerTest#testFkPkJoinDetection PlannerTest#testFkPkJoinDetectionWithHDFSNumRowsEstDisabled PlannerTest#testKuduSelectivity PlannerTest#testMaxRowSize PlannerTest#testMinMaxRuntimeFilters PlannerTest#testMinMaxRuntimeFiltersWithHDFSNumRowsEstDisabled PlannerTest#testMtDopValidation PlannerTest#testParquetFiltering PlannerTest#testParquetFilteringDisabled PlannerTest#testPartitionPruning PlannerTest#testPreaggBytesLimit PlannerTest#testResourceRequirements PlannerTest#testRuntimeFilterQueryOptions PlannerTest#testSortExprMaterialization PlannerTest#testSpillableBufferSizing PlannerTest#testTableSample PlannerTest#testTpch PlannerTest#testKuduTpch PlannerTest#testTpchNested PlannerTest#testUnion TpcdsPlannerTest custom_cluster/test_admission_controller.py::TestAdmissionController::test_dedicated_coordinator_planner_estimates custom_cluster/test_admission_controller.py::TestAdmissionController::test_memory_rejection custom_cluster/test_admission_controller.py::TestAdmissionController::test_pool_mem_limit_configs metadata/test_explain.py::TestExplain::test_explain_level2 metadata/test_explain.py::TestExplain::test_explain_level3 metadata/test_stats_extrapolation.py::TestStatsExtrapolation::test_stats_extrapolation Increase BUFFER_POOL_LIMIT: query_test/test_queries.py::TestQueries::test_analytic_fns query_test/test_runtime_filters.py::TestRuntimeRowFilters::test_row_filter_reservation query_test/test_sort.py::TestQueryFullSort::test_multiple_mem_limits_full_output query_test/test_spilling.py::TestSpillingBroadcastJoins::test_spilling_broadcast_joins query_test/test_spilling.py::TestSpillingDebugActionDimensions::test_spilling_aggs query_test/test_spilling.py::TestSpillingDebugActionDimensions::test_spilling_regression_exhaustive query_test/test_udfs.py::TestUdfExecution::test_mem_limits Increase MEM_LIMIT: query_test/test_mem_usage_scaling.py::TestExchangeMemUsage::test_exchange_mem_usage_scaling query_test/test_mem_usage_scaling.py::TestScanMemLimit::test_hdfs_scanner_thread_mem_scaling Increase MAX_ROW_SIZE: custom_cluster/test_parquet_max_page_header.py::TestParquetMaxPageHeader::test_large_page_header_config query_test/test_insert.py::TestInsertQueries::test_insert_large_string query_test/test_query_mem_limit.py::TestQueryMemLimit::test_mem_limit query_test/test_scanners.py::TestTextSplitDelimiters::test_text_split_across_buffers_delimiter query_test/test_scanners.py::TestWideRow::test_wide_row Disable result spooling to maintain assertion: custom_cluster/test_admission_controller.py::TestAdmissionController::test_set_request_pool custom_cluster/test_admission_controller.py::TestAdmissionController::test_timeout_reason_host_memory custom_cluster/test_admission_controller.py::TestAdmissionController::test_timeout_reason_pool_memory custom_cluster/test_admission_controller.py::TestAdmissionController::test_queue_reasons_memory custom_cluster/test_admission_controller.py::TestAdmissionController::test_pool_config_change_while_queued custom_cluster/test_query_retries.py::TestQueryRetries::test_retry_fetched_rows custom_cluster/test_query_retries.py::TestQueryRetries::test_retry_finished_query custom_cluster/test_scratch_disk.py::TestScratchDir::test_no_dirs custom_cluster/test_scratch_disk.py::TestScratchDir::test_non_existing_dirs custom_cluster/test_scratch_disk.py::TestScratchDir::test_non_writable_dirs query_test/test_insert.py::TestInsertQueries::test_insert_large_string (the last query only) query_test/test_kudu.py::TestKuduMemLimits::test_low_mem_limit_low_selectivity_scan query_test/test_mem_usage_scaling.py::TestScanMemLimit::test_kudu_scan_mem_usage query_test/test_queries.py::TestQueriesParquetTables::test_very_large_strings query_test/test_query_mem_limit.py::TestCodegenMemLimit::test_codegen_mem_limit shell/test_shell_client.py::TestShellClient::test_fetch_size Testing: - Pass exhaustive tests. Change-Id: I9e360c1428676d8f3fab5d95efee18aca085eba4 Reviewed-on: http://gerrit.cloudera.org:8080/16755 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
432 lines
20 KiB
Python
432 lines
20 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
# Targeted Impala insert tests
|
|
|
|
import os
|
|
import pytest
|
|
import re
|
|
|
|
from testdata.common import widetable
|
|
from tests.common.impala_cluster import ImpalaCluster
|
|
from tests.common.impala_test_suite import ImpalaTestSuite
|
|
from tests.common.parametrize import UniqueDatabase
|
|
from tests.common.skip import SkipIfABFS, SkipIfEC, SkipIfLocal, \
|
|
SkipIfHive2, SkipIfNotHdfsMinicluster, SkipIfS3, SkipIfDockerizedCluster
|
|
from tests.common.test_dimensions import (
|
|
create_exec_option_dimension,
|
|
create_uncompressed_text_dimension)
|
|
from tests.common.test_result_verifier import (
|
|
QueryTestResult,
|
|
parse_result_rows)
|
|
from tests.common.test_vector import ImpalaTestDimension
|
|
from tests.verifiers.metric_verifier import MetricVerifier
|
|
|
|
PARQUET_CODECS = ['none', 'snappy', 'gzip', 'zstd', 'lz4']
|
|
|
|
class TestInsertQueries(ImpalaTestSuite):
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestInsertQueries, cls).add_test_dimensions()
|
|
# Fix the exec_option vector to have a single value. This is needed should we decide
|
|
# to run the insert tests in parallel (otherwise there will be two tests inserting
|
|
# into the same table at the same time for the same file format).
|
|
# TODO: When we do decide to run these tests in parallel we could create unique temp
|
|
# tables for each test case to resolve the concurrency problems.
|
|
if cls.exploration_strategy() == 'core':
|
|
cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension(
|
|
cluster_sizes=[0], disable_codegen_options=[True, False], batch_sizes=[0],
|
|
sync_ddl=[0]))
|
|
cls.ImpalaTestMatrix.add_dimension(
|
|
create_uncompressed_text_dimension(cls.get_workload()))
|
|
else:
|
|
cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension(
|
|
cluster_sizes=[0], disable_codegen_options=[True, False], batch_sizes=[0, 1, 16],
|
|
sync_ddl=[0, 1]))
|
|
cls.ImpalaTestMatrix.add_dimension(
|
|
ImpalaTestDimension("compression_codec", *PARQUET_CODECS));
|
|
# Insert is currently only supported for text and parquet
|
|
# For parquet, we want to iterate through all the compression codecs
|
|
# TODO: each column in parquet can have a different codec. We could
|
|
# test all the codecs in one table/file with some additional flags.
|
|
cls.ImpalaTestMatrix.add_constraint(lambda v:\
|
|
v.get_value('table_format').file_format == 'parquet' or \
|
|
(v.get_value('table_format').file_format == 'text' and \
|
|
v.get_value('compression_codec') == 'none'))
|
|
cls.ImpalaTestMatrix.add_constraint(lambda v:\
|
|
v.get_value('table_format').compression_codec == 'none')
|
|
# Only test other batch sizes for uncompressed parquet to keep the execution time
|
|
# within reasonable bounds.
|
|
cls.ImpalaTestMatrix.add_constraint(lambda v:\
|
|
v.get_value('exec_option')['batch_size'] == 0 or \
|
|
(v.get_value('table_format').file_format == 'parquet' and \
|
|
v.get_value('compression_codec') == 'none'))
|
|
|
|
@pytest.mark.execute_serially
|
|
def test_insert_large_string(self, vector, unique_database):
|
|
"""Test handling of large strings in inserter and scanner."""
|
|
if "-Xcheck:jni" in os.environ.get("LIBHDFS_OPTS", ""):
|
|
pytest.skip("Test unreasonably slow with JNI checking.")
|
|
table_name = unique_database + ".insert_largestring"
|
|
|
|
self.client.set_configuration_option("mem_limit", "4gb")
|
|
self.client.set_configuration_option("max_row_size", "257mb")
|
|
file_format = vector.get_value('table_format').file_format
|
|
if file_format == "parquet":
|
|
stored_as = file_format
|
|
else:
|
|
assert file_format == "text"
|
|
stored_as = "textfile"
|
|
self.client.execute("""
|
|
create table {0}
|
|
stored as {1} as
|
|
select repeat('AZ', 128 * 1024 * 1024) as s""".format(table_name, stored_as))
|
|
|
|
# Make sure it produces correct result when materializing no tuples.
|
|
result = self.client.execute("select count(*) from {0}".format(table_name))
|
|
assert result.data == ["1"]
|
|
|
|
# Make sure it got the length right.
|
|
result = self.client.execute("select length(s) from {0}".format(table_name))
|
|
assert result.data == [str(2 * 128 * 1024 * 1024)]
|
|
|
|
# Spot-check the data.
|
|
result = self.client.execute(
|
|
"select substr(s, 200 * 1024 * 1024, 5) from {0}".format(table_name))
|
|
assert result.data == ["ZAZAZ"]
|
|
|
|
# IMPALA-7648: test that we gracefully fail when there is not enough memory
|
|
# to fit the scanned string in memory.
|
|
# IMPALA-9856: Disable result spooling for this query since it is intended to test
|
|
# for OOM.
|
|
self.client.set_configuration_option("spool_query_results", "0")
|
|
self.client.set_configuration_option("mem_limit", "50M")
|
|
try:
|
|
self.client.execute("select s from {0}".format(table_name))
|
|
assert False, "Expected query to fail"
|
|
except Exception, e:
|
|
assert "Memory limit exceeded" in str(e)
|
|
|
|
|
|
@classmethod
|
|
def setup_class(cls):
|
|
super(TestInsertQueries, cls).setup_class()
|
|
|
|
@UniqueDatabase.parametrize(sync_ddl=True)
|
|
# Erasure coding doesn't respect memory limit
|
|
@SkipIfEC.fix_later
|
|
# ABFS partition names cannot end in periods
|
|
@SkipIfABFS.file_or_folder_name_ends_with_period
|
|
def test_insert(self, vector, unique_database):
|
|
if (vector.get_value('table_format').file_format == 'parquet'):
|
|
vector.get_value('exec_option')['COMPRESSION_CODEC'] = \
|
|
vector.get_value('compression_codec')
|
|
self.run_test_case('QueryTest/insert', vector, unique_database,
|
|
multiple_impalad=vector.get_value('exec_option')['sync_ddl'] == 1,
|
|
test_file_vars={'$ORIGINAL_DB': ImpalaTestSuite
|
|
.get_db_name_from_format(vector.get_value('table_format'))})
|
|
|
|
@SkipIfHive2.acid
|
|
@UniqueDatabase.parametrize(sync_ddl=True)
|
|
def test_acid_insert(self, vector, unique_database):
|
|
exec_options = vector.get_value('exec_option')
|
|
file_format = vector.get_value('table_format').file_format
|
|
if (file_format == 'parquet'):
|
|
exec_options['COMPRESSION_CODEC'] = vector.get_value('compression_codec')
|
|
exec_options['DEFAULT_FILE_FORMAT'] = file_format
|
|
self.run_test_case('QueryTest/acid-insert', vector, unique_database,
|
|
multiple_impalad=exec_options['sync_ddl'] == 1)
|
|
|
|
@SkipIfHive2.acid
|
|
@UniqueDatabase.parametrize(sync_ddl=True)
|
|
def test_acid_nonacid_insert(self, vector, unique_database):
|
|
self.run_test_case('QueryTest/acid-nonacid-insert', vector, unique_database,
|
|
multiple_impalad=vector.get_value('exec_option')['sync_ddl'] == 1)
|
|
|
|
@SkipIfHive2.acid
|
|
@UniqueDatabase.parametrize(sync_ddl=True)
|
|
def test_acid_insert_fail(self, vector, unique_database):
|
|
self.run_test_case('QueryTest/acid-insert-fail', vector, unique_database,
|
|
multiple_impalad=vector.get_value('exec_option')['sync_ddl'] == 1)
|
|
|
|
@UniqueDatabase.parametrize(sync_ddl=True)
|
|
@pytest.mark.execute_serially
|
|
@SkipIfNotHdfsMinicluster.tuned_for_minicluster
|
|
def test_insert_mem_limit(self, vector, unique_database):
|
|
if (vector.get_value('table_format').file_format == 'parquet'):
|
|
vector.get_value('exec_option')['COMPRESSION_CODEC'] = \
|
|
vector.get_value('compression_codec')
|
|
self.run_test_case('QueryTest/insert-mem-limit', vector, unique_database,
|
|
multiple_impalad=vector.get_value('exec_option')['sync_ddl'] == 1,
|
|
test_file_vars={'$ORIGINAL_DB': ImpalaTestSuite
|
|
.get_db_name_from_format(vector.get_value('table_format'))})
|
|
# IMPALA-7023: These queries can linger and use up memory, causing subsequent
|
|
# tests to hit memory limits. Wait for some time to allow the query to
|
|
# be reclaimed.
|
|
verifiers = [MetricVerifier(i.service)
|
|
for i in ImpalaCluster.get_e2e_test_cluster().impalads]
|
|
for v in verifiers:
|
|
v.wait_for_metric("impala-server.num-fragments-in-flight", 0, timeout=180)
|
|
|
|
@UniqueDatabase.parametrize(sync_ddl=True)
|
|
def test_insert_overwrite(self, vector, unique_database):
|
|
self.run_test_case('QueryTest/insert_overwrite', vector, unique_database,
|
|
multiple_impalad=vector.get_value('exec_option')['sync_ddl'] == 1,
|
|
test_file_vars={'$ORIGINAL_DB': ImpalaTestSuite
|
|
.get_db_name_from_format(vector.get_value('table_format'))})
|
|
|
|
@UniqueDatabase.parametrize(sync_ddl=True)
|
|
def test_insert_bad_expr(self, vector, unique_database):
|
|
# The test currently relies on codegen being disabled to trigger an error in
|
|
# the output expression of the table sink.
|
|
if vector.get_value('exec_option')['disable_codegen']:
|
|
self.run_test_case('QueryTest/insert_bad_expr', vector, unique_database,
|
|
multiple_impalad=vector.get_value('exec_option')['sync_ddl'] == 1,
|
|
test_file_vars={'$ORIGINAL_DB': ImpalaTestSuite
|
|
.get_db_name_from_format(vector.get_value('table_format'))})
|
|
|
|
@UniqueDatabase.parametrize(sync_ddl=True)
|
|
def test_insert_random_partition(self, vector, unique_database):
|
|
"""Regression test for IMPALA-402: partitioning by rand() leads to strange behaviour
|
|
or crashes."""
|
|
self.run_test_case('QueryTest/insert-random-partition', vector, unique_database,
|
|
multiple_impalad=vector.get_value('exec_option')['sync_ddl'] == 1)
|
|
|
|
class TestInsertWideTable(ImpalaTestSuite):
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestInsertWideTable, cls).add_test_dimensions()
|
|
|
|
# Only vary codegen
|
|
cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension(
|
|
cluster_sizes=[0], disable_codegen_options=[True, False], batch_sizes=[0]))
|
|
|
|
# Inserts only supported on text and parquet
|
|
# TODO: Enable 'text'/codec once the compressed text writers are in.
|
|
cls.ImpalaTestMatrix.add_constraint(lambda v:\
|
|
v.get_value('table_format').file_format == 'parquet' or \
|
|
v.get_value('table_format').file_format == 'text')
|
|
cls.ImpalaTestMatrix.add_constraint(lambda v:\
|
|
v.get_value('table_format').compression_codec == 'none')
|
|
|
|
# Don't run on core. This test is very slow (IMPALA-864) and we are unlikely to
|
|
# regress here.
|
|
if cls.exploration_strategy() == 'core':
|
|
cls.ImpalaTestMatrix.add_constraint(lambda v: False);
|
|
|
|
@SkipIfLocal.parquet_file_size
|
|
def test_insert_wide_table(self, vector, unique_database):
|
|
table_format = vector.get_value('table_format')
|
|
|
|
# Text can't handle as many columns as Parquet (codegen takes forever)
|
|
num_cols = 1000 if table_format.file_format == 'text' else 2000
|
|
|
|
table_name = unique_database + ".insert_widetable"
|
|
if vector.get_value('exec_option')['disable_codegen']:
|
|
table_name += "_codegen_disabled"
|
|
|
|
col_descs = widetable.get_columns(num_cols)
|
|
create_stmt = "CREATE TABLE " + table_name + "(" + ','.join(col_descs) + ")"
|
|
if vector.get_value('table_format').file_format == 'parquet':
|
|
create_stmt += " stored as parquet"
|
|
self.client.execute(create_stmt)
|
|
|
|
# Get a single row of data
|
|
col_vals = widetable.get_data(num_cols, 1, quote_strings=True)[0]
|
|
insert_stmt = "INSERT INTO " + table_name + " VALUES(" + col_vals + ")"
|
|
self.client.execute(insert_stmt)
|
|
|
|
result = self.client.execute("select count(*) from " + table_name)
|
|
assert result.data == ["1"]
|
|
|
|
result = self.client.execute("select * from " + table_name)
|
|
types = result.column_types
|
|
labels = result.column_labels
|
|
expected = QueryTestResult([col_vals], types, labels, order_matters=False)
|
|
actual = QueryTestResult(parse_result_rows(result), types, labels, order_matters=False)
|
|
assert expected == actual
|
|
|
|
class TestInsertPartKey(ImpalaTestSuite):
|
|
"""Regression test for IMPALA-875"""
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestInsertPartKey, cls).add_test_dimensions()
|
|
# Only run for a single table type
|
|
cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension(
|
|
cluster_sizes=[0], disable_codegen_options=[False], batch_sizes=[0],
|
|
sync_ddl=[1]))
|
|
|
|
cls.ImpalaTestMatrix.add_constraint(lambda v:
|
|
(v.get_value('table_format').file_format == 'text'))
|
|
cls.ImpalaTestMatrix.add_constraint(lambda v:\
|
|
v.get_value('table_format').compression_codec == 'none')
|
|
|
|
@pytest.mark.execute_serially
|
|
def test_insert_part_key(self, vector):
|
|
"""Test that partition column exprs are cast to the correct type. See IMPALA-875."""
|
|
self.run_test_case('QueryTest/insert_part_key', vector,
|
|
multiple_impalad=vector.get_value('exec_option')['sync_ddl'] == 1)
|
|
|
|
class TestInsertNullQueries(ImpalaTestSuite):
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestInsertNullQueries, cls).add_test_dimensions()
|
|
# Fix the exec_option vector to have a single value. This is needed should we decide
|
|
# to run the insert tests in parallel (otherwise there will be two tests inserting
|
|
# into the same table at the same time for the same file format).
|
|
# TODO: When we do decide to run these tests in parallel we could create unique temp
|
|
# tables for each test case to resolve the concurrency problems.
|
|
cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension(
|
|
cluster_sizes=[0], disable_codegen_options=[False], batch_sizes=[0]))
|
|
|
|
# These tests only make sense for inserting into a text table with special
|
|
# logic to handle all the possible ways NULL needs to be written as ascii
|
|
cls.ImpalaTestMatrix.add_constraint(lambda v:\
|
|
(v.get_value('table_format').file_format == 'text' and \
|
|
v.get_value('table_format').compression_codec == 'none'))
|
|
|
|
@classmethod
|
|
def setup_class(cls):
|
|
super(TestInsertNullQueries, cls).setup_class()
|
|
|
|
def test_insert_null(self, vector, unique_database):
|
|
self.run_test_case('QueryTest/insert_null', vector, unique_database,
|
|
test_file_vars={'$ORIGINAL_DB': ImpalaTestSuite
|
|
.get_db_name_from_format(vector.get_value('table_format'))})
|
|
|
|
|
|
class TestInsertFileExtension(ImpalaTestSuite):
|
|
"""Tests that files written to a table have the correct file extension. Asserts that
|
|
Parquet files end with .parq and text files end with .txt."""
|
|
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension(
|
|
'table_format_and_file_extension',
|
|
*[('parquet', '.parq'), ('textfile', '.txt')]))
|
|
|
|
@classmethod
|
|
def setup_class(cls):
|
|
super(TestInsertFileExtension, cls).setup_class()
|
|
|
|
def test_file_extension(self, vector, unique_database):
|
|
table_format = vector.get_value('table_format_and_file_extension')[0]
|
|
file_extension = vector.get_value('table_format_and_file_extension')[1]
|
|
table_name = "{0}_table".format(table_format)
|
|
ctas_query = "create table {0}.{1} stored as {2} as select 1".format(
|
|
unique_database, table_name, table_format)
|
|
self.execute_query_expect_success(self.client, ctas_query)
|
|
for path in self.filesystem_client.ls("test-warehouse/{0}.db/{1}".format(
|
|
unique_database, table_name)):
|
|
if not path.startswith('_'): assert path.endswith(file_extension)
|
|
|
|
|
|
class TestInsertHdfsWriterLimit(ImpalaTestSuite):
|
|
"""Test to make sure writer fragment instances are distributed evenly when using max
|
|
hdfs_writers query option."""
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestInsertHdfsWriterLimit, cls).add_test_dimensions()
|
|
cls.ImpalaTestMatrix.add_constraint(lambda v:
|
|
(v.get_value('table_format').file_format == 'parquet'))
|
|
|
|
@UniqueDatabase.parametrize(sync_ddl=True)
|
|
@SkipIfNotHdfsMinicluster.tuned_for_minicluster
|
|
def test_insert_writer_limit(self, unique_database):
|
|
# Root internal (non-leaf) fragment.
|
|
query = "create table {0}.test1 as select int_col from " \
|
|
"functional_parquet.alltypes".format(unique_database)
|
|
self.__run_insert_and_verify_instances(query, max_fs_writers=2, mt_dop=0,
|
|
expected_num_instances_per_host=[1, 2, 2])
|
|
# Root coordinator fragment.
|
|
query = "create table {0}.test2 as select int_col from " \
|
|
"functional_parquet.alltypes limit 100000".format(unique_database)
|
|
self.__run_insert_and_verify_instances(query, max_fs_writers=2, mt_dop=0,
|
|
expected_num_instances_per_host=[1, 1, 2])
|
|
# Root scan fragment. Instance count within limit.
|
|
query = "create table {0}.test3 as select int_col from " \
|
|
"functional_parquet.alltypes".format(unique_database)
|
|
self.__run_insert_and_verify_instances(query, max_fs_writers=4, mt_dop=0,
|
|
expected_num_instances_per_host=[1, 1, 1])
|
|
|
|
@UniqueDatabase.parametrize(sync_ddl=True)
|
|
@SkipIfNotHdfsMinicluster.tuned_for_minicluster
|
|
def test_mt_dop_writer_limit(self, unique_database):
|
|
# Root internal (non-leaf) fragment.
|
|
query = "create table {0}.test1 as select int_col from " \
|
|
"functional_parquet.alltypes".format(unique_database)
|
|
self.__run_insert_and_verify_instances(query, max_fs_writers=11, mt_dop=10,
|
|
expected_num_instances_per_host=[11, 12, 12])
|
|
# Root coordinator fragment.
|
|
query = "create table {0}.test2 as select int_col from " \
|
|
"functional_parquet.alltypes limit 100000".format(unique_database)
|
|
self.__run_insert_and_verify_instances(query, max_fs_writers=2, mt_dop=10,
|
|
expected_num_instances_per_host=[8, 8, 9])
|
|
# Root scan fragment. Instance count within limit.
|
|
query = "create table {0}.test3 as select int_col from " \
|
|
"functional_parquet.alltypes".format(unique_database)
|
|
self.__run_insert_and_verify_instances(query, max_fs_writers=30, mt_dop=10,
|
|
expected_num_instances_per_host=[8, 8, 8])
|
|
|
|
def __run_insert_and_verify_instances(self, query, max_fs_writers, mt_dop,
|
|
expected_num_instances_per_host):
|
|
self.client.set_configuration_option("max_fs_writers", max_fs_writers)
|
|
self.client.set_configuration_option("mt_dop", mt_dop)
|
|
# Test depends on both planner and scheduler to see the same state of the cluster
|
|
# having 3 executors, so to reduce flakiness we make sure all 3 executors are up
|
|
# and running.
|
|
self.impalad_test_service.wait_for_metric_value("cluster-membership.backends.total",
|
|
3)
|
|
result = self.client.execute(query)
|
|
assert 'HDFS WRITER' in result.exec_summary[0]['operator'], result.runtime_profile
|
|
assert int(result.exec_summary[0]['num_instances']) <= int(
|
|
max_fs_writers), result.runtime_profile
|
|
regex = r'Per Host Number of Fragment Instances' \
|
|
r':.*?\((.*?)\).*?\((.*?)\).*?\((.*?)\).*?\n'
|
|
matches = re.findall(regex, result.runtime_profile)
|
|
assert len(matches) == 1 and len(matches[0]) == 3, result.runtime_profile
|
|
num_instances_per_host = [int(i) for i in matches[0]]
|
|
num_instances_per_host.sort()
|
|
expected_num_instances_per_host.sort()
|
|
assert num_instances_per_host == expected_num_instances_per_host, \
|
|
result.runtime_profile
|
|
self.client.clear_configuration()
|