mirror of
https://github.com/apache/impala.git
synced 2026-01-10 00:00:16 -05:00
Currently if the materialization of any column cannot be codegen'd
because its type is unsupported (e.g. CHAR(N)), the whole codegen is
cancelled for the text scanner.
This commit adds the function TextConverter::SupportsCodegenWriteSlot
that returns whether the given ColumnType is supported. If the type is
not supported, HdfsScanner codegens code that calls the interpreted
version instead of failing codegen. For other columns codegen is used as
usually.
Benchmarks:
Copied and modified a TPCH table with scale factor 5 to add a CHAR
column to it::
USE tpch5;
CREATE TABLE IF NOT EXISTS lineitem_char AS
SELECT *, CAST(l_shipdate AS CHAR(10)) l_shipdate_char
FROM lineitem;
Run the following query 100 times after one warm-up run with and
without this change:
SELECT *
FROM tpch5.lineitem_char
WHERE
l_partkey BETWEEN 500 AND 500000 AND
l_linestatus = 'F' AND
l_quantity < 35 AND
l_extendedprice BETWEEN 2000 AND 8000 AND
l_discount > 0 AND
l_tax BETWEEN 0.04 AND 0.06 AND
l_returnflag IN ('A', 'N') AND
l_shipdate_char < '1996-06-20'
ORDER BY l_shipdate_char
LIMIT 10;
Without this commit: mean: 2.92, standard deviation: 0.13.
With this commit: mean: 2.21, standard deviation: 0.072.
Testing:
The interesting cases regarding char are covered in
0167c5b424/testdata/workloads/functional-query/queries/QueryTest/chars.test
Change-Id: Id370193af578ecf23ed3c6bfcc65fec448156fa3
Reviewed-on: http://gerrit.cloudera.org:8080/16059
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
43 lines
1.0 KiB
Plaintext
43 lines
1.0 KiB
Plaintext
====
|
|
---- QUERY
|
|
set disable_codegen_rows_threshold=0;
|
|
select count(*) from alltypes t1
|
|
join /* +SHUFFLE */ alltypes t2
|
|
on t1.int_col= t2.int_col and
|
|
t1.string_col = t2.string_col
|
|
---- RESULTS
|
|
5329000
|
|
---- TYPES
|
|
bigint
|
|
---- RUNTIME_PROFILE
|
|
# Verify that codegen was enabled
|
|
row_regex: .*Hash Partitioned Sender Codegen Enabled.*
|
|
====
|
|
---- QUERY
|
|
set disable_codegen_rows_threshold=0;
|
|
select count(*) from alltypes t1
|
|
join /* +BROADCAST */ alltypes t2
|
|
on t1.int_col= t2.int_col and
|
|
t1.string_col = t2.string_col
|
|
---- RESULTS
|
|
5329000
|
|
---- TYPES
|
|
bigint
|
|
---- RUNTIME_PROFILE
|
|
# Verify that codegen was enabled
|
|
row_regex: .*Unpartitioned Sender Codegen Disabled: not needed.*
|
|
====
|
|
---- QUERY
|
|
set disable_codegen_rows_threshold=0;
|
|
select count(*) from chars_tiny t1
|
|
join /* +SHUFFLE */ chars_tiny t2 on t1.cs=t2.cs;
|
|
---- RESULTS
|
|
10
|
|
---- TYPES
|
|
bigint
|
|
---- RUNTIME_PROFILE
|
|
# Verify that CHAR codegen was enabled for hash partitioning even though CHAR
|
|
# codegen isn't supported everywhere.
|
|
row_regex: .*Hash Partitioned Sender Codegen Enabled.*
|
|
====
|