Files
impala/testdata/workloads/functional-query/queries/QueryTest/java-udf.test
Tim Armstrong d7246d64c7 IMPALA-1430,IMPALA-4108: codegen all builtin aggregate functions
This change enables codegen for all builtin aggregate functions,
e.g. timestamp functions and group_concat.

There are several parts to the change:
* Adding support for generic UDAs. Previous the codegen code did not
  handle multiple input arguments or NULL return values.
* Defaulting to using the UDA interface when there is not a special
  codegen path (we have implementations of all builtin aggregate
  functions for the interpreted path).
* Remove all the logic to disable codegen for the special cases that now
  are supported.

Also fix the generation of code to get/set NULL bits since I needed
to add functionality there anyway.

Testing:
Add tests that check that codegen was enabled for builtin aggregate
functions. Also fix some gaps in the preexisting tests.

Also add tests for UDAs that check input/output nulls are handled
correctly, in anticipation of enabling codegen for arbitrary UDAs.
The tests are run with both codegen enabled and disabled. To avoid
flaky tests, we switch the UDF tests to use "unique_database".

Perf:
Ran local TPC-H and targeted perf. Spent a lot of time on TPC-H Q1,
since my original approach regressed it ~5%. In the end the problem was
to do with the ordering of loads/stores to the slot and null bit in the
generated code: the previous version of the code exploited some
properties of the particular aggregate function. I ended up replicating
this behaviour to avoid regressing perf.

Change-Id: Id9dc21d1d676505d3617e1e4f37557397c4fb260
Reviewed-on: http://gerrit.cloudera.org:8080/4655
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Internal Jenkins
2016-11-09 03:27:12 +00:00

316 lines
5.8 KiB
Plaintext

====
---- QUERY
select hive_pi()
---- RESULTS
3.141592653589793
---- TYPES
DOUBLE
====
---- QUERY
select hive_bin(100)
---- RESULTS
'1100100'
---- TYPES
STRING
====
---- QUERY
select min(hive_pi()) from functional.alltypesagg
---- RESULTS
3.141592653589793
---- TYPES
DOUBLE
====
---- QUERY
# Test identity functions
select identity(true);
---- TYPES
boolean
---- RESULTS
true
====
---- QUERY
select identity(cast(10 as tinyint));
---- TYPES
tinyint
---- RESULTS
10
====
---- QUERY
select identity(cast(10 as smallint));
---- TYPES
smallint
---- RESULTS
10
====
---- QUERY
select identity(cast(10 as int));
---- TYPES
int
---- RESULTS
10
====
---- QUERY
select identity(cast(10 as bigint));
---- TYPES
bigint
---- RESULTS
10
====
---- QUERY
select identity(cast(10.0 as float));
---- TYPES
float
---- RESULTS
10
====
---- QUERY
select identity(cast(10.0 as double));
---- TYPES
double
---- RESULTS
10
====
---- QUERY
# IMPALA-1456. Each "identity" call below tests a different type (BytesWritable, Text,
# and String).
select identity("why hello there"),
identity("why", " hello there"),
identity("why", " hello", " there");
---- TYPES
string, string, string
---- RESULTS
'why hello there','why hello there','why hello there'
====
---- QUERY
select identity(NULL);
---- TYPES
boolean
---- RESULTS
NULL
====
---- QUERY
# IMPALA-1134. Each "identity" call below tests a different type (BytesWritable, Text,
# and String). The different types are handled slightly differently.
select length(identity("0123456789")),
length(identity("0123456789", "0123456789")),
length(identity("0123456789", "0123456789", "0123456789"));
---- TYPES
int, int, int
---- RESULTS
10,20,30
====
---- QUERY
# IMPALA-1392: Hive UDFs that throw exceptions should return NULL
select throws_exception();
---- TYPES
boolean
---- RESULTS
NULL
====
---- QUERY
select throws_exception() from functional.alltypestiny;
---- TYPES
boolean
---- RESULTS
NULL
NULL
NULL
NULL
NULL
NULL
NULL
NULL
====
---- QUERY
select hive_add(cast(1 as int), cast(2 as int));
---- TYPES
int
---- RESULTS
3
====
---- QUERY
select hive_add(hive_add(cast(1 as int), cast(2 as int)), cast(2 as int));
---- TYPES
int
---- RESULTS
5
====
---- QUERY
select hive_add(cast(hive_add(cast(1 as int), cast(2 as int)) - hive_add(cast(2 as int), cast(1 as int)) as int), cast(2 as int));
---- TYPES
int
---- RESULTS
2
====
---- QUERY
select hive_add(cast(1 as smallint), cast(2 as smallint));
---- TYPES
smallint
---- RESULTS
3
====
---- QUERY
select hive_add(cast(1.0 as float), cast(2.0 as float));
---- TYPES
float
---- RESULTS
3.0
====
---- QUERY
select hive_add(cast(1.0 as double), cast(2.0 as double));
---- TYPES
double
---- RESULTS
3.0
====
---- QUERY
select hive_add(cast(1 as boolean), cast(0 as boolean));
---- TYPES
boolean
---- RESULTS
false
====
---- QUERY
# Testing whether all of persistent Java udfs are accessible.
select identity_anytype(true);
---- TYPES
boolean
---- RESULTS
true
====
---- QUERY
select identity_anytype(cast(10 as tinyint));
---- TYPES
tinyint
---- RESULTS
10
====
---- QUERY
select identity_anytype(cast(10 as smallint));
---- TYPES
smallint
---- RESULTS
10
====
---- QUERY
select identity_anytype(cast(10 as int));
---- TYPES
int
---- RESULTS
10
====
---- QUERY
select identity_anytype(cast(10 as bigint));
---- TYPES
bigint
---- RESULTS
10
====
---- QUERY
select identity_anytype(cast(10.0 as float));
---- TYPES
float
---- RESULTS
10
====
---- QUERY
select identity_anytype(cast(10.0 as double));
---- TYPES
double
---- RESULTS
10
====
---- QUERY
select identity_anytype("a", "b");
---- TYPES
string
---- RESULTS
'ab'
====
---- QUERY
select identity_anytype("a", "b", "c");
---- TYPES
string
---- RESULTS
'abc'
====
---- QUERY
# IMPALA-3378: test many Java UDFs being opened and run concurrently
select * from
(select max(int_col) from functional.alltypesagg
where identity(bool_col) union all
(select max(int_col) from functional.alltypesagg
where identity(tinyint_col) > 1 union all
(select max(int_col) from functional.alltypesagg
where identity(smallint_col) > 1 union all
(select max(int_col) from functional.alltypesagg
where identity(int_col) > 1 union all
(select max(int_col) from functional.alltypesagg
where identity(bigint_col) > 1 union all
(select max(int_col) from functional.alltypesagg
where identity(float_col) > 1.0 union all
(select max(int_col) from functional.alltypesagg
where identity(double_col) > 1.0 union all
(select max(int_col) from functional.alltypesagg
where identity(string_col) > '1' union all
(select max(int_col) from functional.alltypesagg
where not identity(bool_col) union all
(select max(int_col) from functional.alltypesagg
where identity(tinyint_col) > 2 union all
(select max(int_col) from functional.alltypesagg
where identity(smallint_col) > 2 union all
(select max(int_col) from functional.alltypesagg
where identity(int_col) > 2 union all
(select max(int_col) from functional.alltypesagg
where identity(bigint_col) > 2 union all
(select max(int_col) from functional.alltypesagg
where identity(float_col) > 2.0 union all
(select max(int_col) from functional.alltypesagg
where identity(double_col) > 2.0 union all
(select max(int_col) from functional.alltypesagg
where identity(string_col) > '2'
)))))))))))))))) v
---- TYPES
INT
---- RESULTS
998
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
====
---- QUERY
drop table if exists replace_string_input
====
---- QUERY
create table replace_string_input as
values('toast'), ('scone'), ('stuff'), ('sssss'), ('yes'), ('scone'), ('stuff');
====
---- QUERY
# Regression test for IMPALA-4266: memory management bugs with output strings from
# Java UDFS, exposed by using the UDF as a grouping key in an aggregation.
# The UDF replaces "s" with "ss" in the strings.
select distinct replace_string(_c0) as es
from replace_string_input
order by 1;
---- TYPES
string
---- RESULTS
'sscone'
'ssssssssss'
'sstuff'
'toasst'
'yess'
====