IMPALA-1430,IMPALA-4108: codegen all builtin aggregate functions

This change enables codegen for all builtin aggregate functions,
e.g. timestamp functions and group_concat.

There are several parts to the change:
* Adding support for generic UDAs. Previous the codegen code did not
  handle multiple input arguments or NULL return values.
* Defaulting to using the UDA interface when there is not a special
  codegen path (we have implementations of all builtin aggregate
  functions for the interpreted path).
* Remove all the logic to disable codegen for the special cases that now
  are supported.

Also fix the generation of code to get/set NULL bits since I needed
to add functionality there anyway.

Testing:
Add tests that check that codegen was enabled for builtin aggregate
functions. Also fix some gaps in the preexisting tests.

Also add tests for UDAs that check input/output nulls are handled
correctly, in anticipation of enabling codegen for arbitrary UDAs.
The tests are run with both codegen enabled and disabled. To avoid
flaky tests, we switch the UDF tests to use "unique_database".

Perf:
Ran local TPC-H and targeted perf. Spent a lot of time on TPC-H Q1,
since my original approach regressed it ~5%. In the end the problem was
to do with the ordering of loads/stores to the slot and null bit in the
generated code: the previous version of the code exploited some
properties of the particular aggregate function. I ended up replicating
this behaviour to avoid regressing perf.

Change-Id: Id9dc21d1d676505d3617e1e4f37557397c4fb260
Reviewed-on: http://gerrit.cloudera.org:8080/4655
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Internal Jenkins
This commit is contained in:
Tim Armstrong
2016-09-28 13:04:07 -07:00
committed by Internal Jenkins
parent 6775893894
commit d7246d64c7
47 changed files with 1039 additions and 802 deletions

View File

@@ -1,23 +1,19 @@
====
---- QUERY
create database if not exists udf_test_errors;
---- RESULTS
====
---- QUERY
create function if not exists udf_test_errors.hive_pi() returns double
create function if not exists hive_pi() returns double
location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar'
symbol='org.apache.hadoop.hive.ql.udf.UDFPI';
---- RESULTS
====
---- QUERY
create function if not exists udf_test_errors.foo() returns double
create function if not exists foo() returns double
location '$FILESYSTEM_PREFIX/test-warehouse/not-a-real-file.so'
symbol='FnDoesNotExist';
---- CATCH
Could not load binary: $FILESYSTEM_PREFIX/test-warehouse/not-a-real-file.so
====
---- QUERY
create function if not exists udf_test_errors.foo() returns double
create function if not exists foo() returns double
location '$FILESYSTEM_PREFIX/test-warehouse/not-a-real-file.so'
symbol='FnDoesNotExist';
---- CATCH
@@ -25,7 +21,7 @@ Could not load binary: $FILESYSTEM_PREFIX/test-warehouse/not-a-real-file.so
====
---- QUERY
# This test is run with codegen disabled. Interpretation only handles up to 20 arguments.
create function if not exists udf_test_errors.twenty_args(int, int, int, int, int, int,
create function if not exists twenty_args(int, int, int, int, int, int,
int, int, int, int, int, int, int, int, int, int, int, int, int, int) returns int
location '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so'
symbol='TwentyArgs';
@@ -33,7 +29,7 @@ symbol='TwentyArgs';
====
---- QUERY
# Verifies that interpretation can support up to 20 arguments
select udf_test_errors.twenty_args(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20);
select twenty_args(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20);
---- TYPES
INT
---- RESULTS
@@ -41,7 +37,7 @@ INT
====
---- QUERY
# This test is run with codegen disabled. Interpretation only handles up to 20 arguments.
create function if not exists udf_test_errors.twenty_one_args(int, int, int, int, int, int,
create function if not exists twenty_one_args(int, int, int, int, int, int,
int, int, int, int, int, int, int, int, int, int, int, int, int, int, int) returns int
location '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so'
symbol='TwentyOneArgs';
@@ -49,35 +45,36 @@ symbol='TwentyOneArgs';
====
---- QUERY
# Verifies that interpretation fails with more than 20 arguments.
select udf_test_errors.twenty_one_args(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21);
select twenty_one_args(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21);
---- CATCH
Cannot interpret native UDF 'twenty_one_args': number of arguments is more than 20. Codegen is needed. Please set DISABLE_CODEGEN to false.
====
---- QUERY
# This test is run with codegen disabled. IR UDF will fail.
create function if not exists udf_test_errors.nine_args_ir(int, int, int, int, int, int,
create function if not exists nine_args_ir(int, int, int, int, int, int,
int, int, int) returns int
location '$FILESYSTEM_PREFIX/test-warehouse/test-udfs.ll'
symbol='NineArgs';
---- RESULTS
====
---- QUERY
select udf_test_errors.nine_args_ir(1,2,3,4,5,6,7,8,9);
select nine_args_ir(1,2,3,4,5,6,7,8,9);
---- CATCH
Cannot interpret LLVM IR UDF 'nine_args_ir': Codegen is needed. Please set DISABLE_CODEGEN to false.
====
---- QUERY
drop database udf_test_errors;
use default;
drop database $DATABASE;
---- CATCH
Cannot drop non-empty database: udf_test_errors
Cannot drop non-empty database:
====
---- QUERY
drop function udf_test_errors.hive_pi();
drop function udf_test_errors.twenty_args(int, int, int, int, int, int, int, int,
use $DATABASE;
drop function hive_pi();
drop function twenty_args(int, int, int, int, int, int, int, int,
int, int, int, int, int, int, int, int, int, int, int, int);
drop function udf_test_errors.twenty_one_args(int, int, int, int, int, int, int, int,
drop function twenty_one_args(int, int, int, int, int, int, int, int,
int, int, int, int, int, int, int, int, int, int, int, int, int);
drop function udf_test_errors.nine_args_ir(int, int, int, int, int, int, int, int, int);
drop database udf_test_errors;
drop function nine_args_ir(int, int, int, int, int, int, int, int, int);
---- RESULTS
====