mirror of
https://github.com/apache/impala.git
synced 2026-01-01 18:00:30 -05:00
This change codegens the hash partitioning logic of KrpcDataStreamSender::Send() when the partitioning strategy is HASH_PARTITIONED. It does so by unrolling the loop which evaluates each row against the partitioning expressions and hashes the result. It also replaces the number of channels of that sender with a constant at runtime. With this change, we get reasonable speedup with some benchmarks: +------------+-----------------------+---------+------------+------------+----------------+ | Workload | File Format | Avg (s) | Delta(Avg) | GeoMean(s) | Delta(GeoMean) | +------------+-----------------------+---------+------------+------------+----------------+ | TPCH(_300) | parquet / none / none | 20.03 | -6.44% | 13.56 | -7.15% | +------------+-----------------------+---------+------------+------------+----------------+ +---------------------+-----------------------+---------+------------+------------+----------------+ | Workload | File Format | Avg (s) | Delta(Avg) | GeoMean(s) | Delta(GeoMean) | +---------------------+-----------------------+---------+------------+------------+----------------+ | TARGETED-PERF(_300) | parquet / none / none | 58.59 | -5.56% | 12.28 | -5.30% | +---------------------+-----------------------+---------+------------+------------+----------------+ +-------------------------+-----------------------+---------+------------+------------+----------------+ | Workload | File Format | Avg (s) | Delta(Avg) | GeoMean(s) | Delta(GeoMean) | +-------------------------+-----------------------+---------+------------+------------+----------------+ | TPCDS-UNMODIFIED(_1000) | parquet / none / none | 15.60 | -3.10% | 7.16 | -4.33% | +-------------------------+-----------------------+---------+------------+------------+----------------+ +-------------------+-----------------------+---------+------------+------------+----------------+ | Workload | File Format | Avg (s) | Delta(Avg) | GeoMean(s) | Delta(GeoMean) | +-------------------+-----------------------+---------+------------+------------+----------------+ | TPCH_NESTED(_300) | parquet / none / none | 30.93 | -3.02% | 17.46 | -4.71% | +-------------------+-----------------------+---------+------------+------------+----------------+ Change-Id: I1c44cc9312c062cc7a5a4ac9156ceaa31fb887ff Reviewed-on: http://gerrit.cloudera.org:8080/10421 Reviewed-by: Michael Ho <kwho@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
59 lines
2.5 KiB
Python
59 lines
2.5 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
# Tests end-to-end codegen behaviour.
|
|
|
|
from tests.common.impala_test_suite import ImpalaTestSuite
|
|
from tests.common.skip import SkipIf
|
|
from tests.common.test_dimensions import create_exec_option_dimension_from_dict
|
|
from tests.common.test_result_verifier import get_node_exec_options,\
|
|
assert_codegen_enabled
|
|
|
|
class TestCodegen(ImpalaTestSuite):
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestCodegen, cls).add_test_dimensions()
|
|
cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension_from_dict({
|
|
'exec_single_node_rows_threshold' : [0]}))
|
|
# No need to run this on all file formats. Run it on text/none, which has stats
|
|
# computed.
|
|
cls.ImpalaTestMatrix.add_constraint(
|
|
lambda v: v.get_value('table_format').file_format == 'text' and
|
|
v.get_value('table_format').compression_codec == 'none')
|
|
|
|
def test_disable_codegen(self, vector):
|
|
"""Test that codegen is enabled/disabled by the planner as expected."""
|
|
self.run_test_case('QueryTest/disable-codegen', vector)
|
|
|
|
def test_select_node_codegen(self, vector):
|
|
"""Test that select node is codegened"""
|
|
result = self.execute_query('select * from (select * from functional.alltypes '
|
|
'limit 1000000) t1 where int_col > 10 limit 10')
|
|
exec_options = get_node_exec_options(result.runtime_profile, 1)
|
|
# Make sure test fails if there are no exec options in the profile for the node
|
|
assert len(exec_options) > 0
|
|
assert_codegen_enabled(result.runtime_profile, [1])
|
|
|
|
@SkipIf.not_krpc
|
|
def test_datastream_sender_codegen(self, vector):
|
|
"""Test the KrpcDataStreamSender's codegen logic"""
|
|
self.run_test_case('QueryTest/datastream-sender-codegen', vector)
|