mirror of
https://github.com/apache/impala.git
synced 2025-12-30 12:02:10 -05:00
This change enables codegen for all builtin aggregate functions, e.g. timestamp functions and group_concat. There are several parts to the change: * Adding support for generic UDAs. Previous the codegen code did not handle multiple input arguments or NULL return values. * Defaulting to using the UDA interface when there is not a special codegen path (we have implementations of all builtin aggregate functions for the interpreted path). * Remove all the logic to disable codegen for the special cases that now are supported. Also fix the generation of code to get/set NULL bits since I needed to add functionality there anyway. Testing: Add tests that check that codegen was enabled for builtin aggregate functions. Also fix some gaps in the preexisting tests. Also add tests for UDAs that check input/output nulls are handled correctly, in anticipation of enabling codegen for arbitrary UDAs. The tests are run with both codegen enabled and disabled. To avoid flaky tests, we switch the UDF tests to use "unique_database". Perf: Ran local TPC-H and targeted perf. Spent a lot of time on TPC-H Q1, since my original approach regressed it ~5%. In the end the problem was to do with the ordering of loads/stores to the slot and null bit in the generated code: the previous version of the code exploited some properties of the particular aggregate function. I ended up replicating this behaviour to avoid regressing perf. Change-Id: Id9dc21d1d676505d3617e1e4f37557397c4fb260 Reviewed-on: http://gerrit.cloudera.org:8080/4655 Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com> Tested-by: Internal Jenkins
100 lines
3.7 KiB
Plaintext
100 lines
3.7 KiB
Plaintext
====
|
|
---- QUERY
|
|
create function hive_pi() returns double
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar'
|
|
symbol='org.apache.hadoop.hive.ql.udf.UDFPI';
|
|
|
|
create function hive_round(double) returns double
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar'
|
|
symbol='org.apache.hadoop.hive.ql.udf.UDFRound';
|
|
|
|
create function hive_floor(double) returns bigint
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar'
|
|
symbol='org.apache.hadoop.hive.ql.udf.UDFFloor';
|
|
|
|
create function hive_mod(int, int) returns int
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar'
|
|
symbol='org.apache.hadoop.hive.ql.udf.UDFPosMod';
|
|
|
|
create function hive_bin(bigint) returns string
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar'
|
|
symbol='org.apache.hadoop.hive.ql.udf.UDFBin';
|
|
|
|
create function hive_lower(string) returns string
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar'
|
|
symbol='org.apache.hadoop.hive.ql.udf.UDFLower';
|
|
|
|
# Used to test persistent java functions
|
|
create function identity_anytype
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
|
|
create function identity(boolean) returns boolean
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
|
|
create function identity(tinyint) returns tinyint
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
|
|
create function identity(smallint) returns smallint
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
|
|
create function identity(int) returns int
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
|
|
create function identity(bigint) returns bigint
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
|
|
create function identity(float) returns float
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
|
|
create function identity(double) returns double
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
|
|
create function identity(string) returns string
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
|
|
create function identity(string, string) returns string
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
|
|
create function identity(string, string, string) returns string
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
|
|
create function hive_add(int, int) returns int
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
|
|
create function hive_add(smallint, smallint) returns smallint
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
|
|
create function hive_add(float, float) returns float
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
|
|
create function hive_add(double, double) returns double
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
|
|
create function hive_add(boolean, boolean) returns boolean
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
|
|
create function throws_exception() returns boolean
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdfException';
|
|
|
|
create function replace_string(string) returns string
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.ReplaceStringUdf';
|
|
====
|