Files
impala/testdata/workloads/functional-query/queries/QueryTest/java-udf.test
Steve Carlin ca5ea4aeab IMPALA-11162: Support GenericUDFs for Hive
Hive has 2 types of UDFs. This commit contains limited
support for the second generation UDFs called GenericUDFs.

The main limitations are as follows:

Decimal types are not supported.  The Impala framework determines
the precision and scale of the decimal return type. However, the
Hive GenericUDFs allow the capability to choose its own return
type based on the parameters. Until this can be resolved, it is
safer to forbid decimals from being used. Note that this
limitation currently exists in the first generation of Hive Java
UDFs.

Complex types are not supported.

Functions are not extracted from the jar file. The first generation
of Hive UDFs allowed this because the method prototypes are
explicitly defined and can be determined at function creation time. For
GenericUDFs, the return types are determined based on the parameters
passed in when running a query.

For the same reason as above, GenericUDFs cannot be made permanent.
They will need to be recreated everytime the server is restarted.
This is a severe limitation and will be resolved in the near future.

Change-Id: Ie6fd09120db413fade94410c83ebe8ff104013cd
Reviewed-on: http://gerrit.cloudera.org:8080/18295
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Csaba Ringhofer <csringhofer@cloudera.com>
2022-05-11 15:10:28 +00:00

356 lines
8.0 KiB
Plaintext

====
---- QUERY
select hive_pi()
---- RESULTS
3.141592653589793
---- TYPES
DOUBLE
====
---- QUERY
select hive_bin(100)
---- RESULTS
'1100100'
---- TYPES
STRING
====
---- QUERY
select min(hive_pi()) from functional.alltypesagg
---- RESULTS
3.141592653589793
---- TYPES
DOUBLE
====
---- QUERY
# Test identity functions
select identity(true), identity(cast(NULL as boolean));
---- TYPES
boolean, boolean
---- RESULTS
true,NULL
====
---- QUERY
select identity(cast(10 as tinyint)), identity(cast(NULL as tinyint));
---- TYPES
tinyint, tinyint
---- RESULTS
10,NULL
====
---- QUERY
select identity(cast(10 as smallint)), identity(cast(NULL as smallint));
---- TYPES
smallint, smallint
---- RESULTS
10,NULL
====
---- QUERY
select identity(cast(10 as int)), identity(cast(NULL as int));
---- TYPES
int, int
---- RESULTS
10,NULL
====
---- QUERY
select identity(cast(10 as bigint)), identity(cast(NULL as bigint));
---- TYPES
bigint, bigint
---- RESULTS
10,NULL
====
---- QUERY
select identity(cast(10.0 as float)), identity(cast(NULL as float));
---- TYPES
float, float
---- RESULTS
10,NULL
====
---- QUERY
select identity(cast(10.0 as double)), identity(cast(NULL as double));
---- TYPES
double, double
---- RESULTS
10,NULL
====
---- QUERY
# IMPALA-1456. Each "identity" call below tests a different type (BytesWritable, Text,
# and String).
select identity("why hello there"),
identity("why", " hello there"),
identity("why", " hello", " there"),
identity(cast(NULL as string)),
identity(cast(NULL as string), cast(NULL as string)),
identity(cast(NULL as string), cast(NULL as string), cast(NULL as string));
---- TYPES
string, string, string, string, string, string
---- RESULTS
'why hello there','why hello there','why hello there','NULL','NULL','NULL'
====
---- QUERY
# IMPALA-1134. Each "identity" call below tests a different type (BytesWritable, Text,
# and String). The different types are handled slightly differently.
select length(identity("0123456789")),
length(identity("0123456789", "0123456789")),
length(identity("0123456789", "0123456789", "0123456789"));
---- TYPES
int, int, int
---- RESULTS
10,20,30
====
---- QUERY
# IMPALA-1392: Hive UDFs that throw exceptions should return NULL
select throws_exception();
---- TYPES
boolean
---- RESULTS
NULL
====
---- QUERY
set abort_java_udf_on_exception=true;
select throws_exception() from functional.alltypestiny;
---- CATCH
Test exception
====
---- QUERY
select throws_exception() from functional.alltypestiny;
---- TYPES
boolean
---- RESULTS
NULL
NULL
NULL
NULL
NULL
NULL
NULL
NULL
====
---- QUERY
select hive_add(cast(1 as int), cast(2 as int));
---- TYPES
int
---- RESULTS
3
====
---- QUERY
select hive_add(hive_add(cast(1 as int), cast(2 as int)), cast(2 as int));
---- TYPES
int
---- RESULTS
5
====
---- QUERY
select hive_add(cast(hive_add(cast(1 as int), cast(2 as int)) - hive_add(cast(2 as int), cast(1 as int)) as int), cast(2 as int));
---- TYPES
int
---- RESULTS
2
====
---- QUERY
select hive_add(cast(1 as smallint), cast(2 as smallint));
---- TYPES
smallint
---- RESULTS
3
====
---- QUERY
select hive_add(cast(1.0 as float), cast(2.0 as float));
---- TYPES
float
---- RESULTS
3.0
====
---- QUERY
select hive_add(cast(1.0 as double), cast(2.0 as double));
---- TYPES
double
---- RESULTS
3.0
====
---- QUERY
select hive_add(cast(1 as boolean), cast(0 as boolean));
---- TYPES
boolean
---- RESULTS
false
====
---- QUERY
# Testing whether all of persistent Java udfs are accessible.
select identity_anytype(true);
---- TYPES
boolean
---- RESULTS
true
====
---- QUERY
select identity_anytype(cast(10 as tinyint));
---- TYPES
tinyint
---- RESULTS
10
====
---- QUERY
select identity_anytype(cast(10 as smallint));
---- TYPES
smallint
---- RESULTS
10
====
---- QUERY
select identity_anytype(cast(10 as int));
---- TYPES
int
---- RESULTS
10
====
---- QUERY
select identity_anytype(cast(10 as bigint));
---- TYPES
bigint
---- RESULTS
10
====
---- QUERY
select identity_anytype(cast(10.0 as float));
---- TYPES
float
---- RESULTS
10
====
---- QUERY
select identity_anytype(cast(10.0 as double));
---- TYPES
double
---- RESULTS
10
====
---- QUERY
select identity_anytype("a", "b");
---- TYPES
string
---- RESULTS
'ab'
====
---- QUERY
select identity_anytype("a", "b", "c");
---- TYPES
string
---- RESULTS
'abc'
====
---- QUERY
# IMPALA-3378: test many Java UDFs being opened and run concurrently
select * from
(select max(int_col) from functional.alltypesagg
where identity(bool_col) union all
(select max(int_col) from functional.alltypesagg
where identity(tinyint_col) > 1 union all
(select max(int_col) from functional.alltypesagg
where identity(smallint_col) > 1 union all
(select max(int_col) from functional.alltypesagg
where identity(int_col) > 1 union all
(select max(int_col) from functional.alltypesagg
where identity(bigint_col) > 1 union all
(select max(int_col) from functional.alltypesagg
where identity(float_col) > 1.0 union all
(select max(int_col) from functional.alltypesagg
where identity(double_col) > 1.0 union all
(select max(int_col) from functional.alltypesagg
where identity(string_col) > '1' union all
(select max(int_col) from functional.alltypesagg
where not identity(bool_col) union all
(select max(int_col) from functional.alltypesagg
where identity(tinyint_col) > 2 union all
(select max(int_col) from functional.alltypesagg
where identity(smallint_col) > 2 union all
(select max(int_col) from functional.alltypesagg
where identity(int_col) > 2 union all
(select max(int_col) from functional.alltypesagg
where identity(bigint_col) > 2 union all
(select max(int_col) from functional.alltypesagg
where identity(float_col) > 2.0 union all
(select max(int_col) from functional.alltypesagg
where identity(double_col) > 2.0 union all
(select max(int_col) from functional.alltypesagg
where identity(string_col) > '2'
)))))))))))))))) v
---- TYPES
INT
---- RESULTS
998
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
====
---- QUERY
drop table if exists replace_string_input
====
---- QUERY
create table replace_string_input as
values('toast'), ('scone'), ('stuff'), ('sssss'), ('yes'), ('scone'), ('stuff');
====
---- QUERY
# Regression test for IMPALA-4266: memory management bugs with output strings from
# Java UDFS, exposed by using the UDF as a grouping key in an aggregation.
# The UDF replaces "s" with "ss" in the strings.
select distinct replace_string(_c0) as es
from replace_string_input
order by 1;
---- TYPES
string
---- RESULTS
'sscone'
'ssssssssss'
'sstuff'
'toasst'
'yess'
====
---- QUERY
# Regression test for IMPALA-8016; this UDF loads another class in the same jar.
select import_nearby_classes("placeholder");
---- TYPES
string
---- RESULTS
'Hello'
====
---- QUERY
# In the interpreted code path of HiveUdfCall, we use ScalarExprEvaluator::GetValue which
# evaluates the child expression and returns a nullptr if it is NULL. But in the case of
# TYPE_DATE it also returns nullptr if the date is invalid, so the interpreted path would
# handle invalid dates as nulls. Java UDFs involving Date are not allowed yet, this test
# is a reminder that we'll have to handle this case in codegen when we add support for Date.
create function identity(Date) returns Date
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
symbol='org.apache.impala.TestUdf';
---- CATCH
AnalysisException: Type DATE is not supported for Java UDFs.
====
---- QUERY
# Timestamp values are not supported in Java UDFs yet and the implementation may have been
# written without Timestamps in mind. This test is a reminder that we'll need to review
# and test the implementation of HiveUdfCall (especially codegen) with Timestamp values
# when we add support for them.
create function identity(Timestamp) returns Timestamp
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
symbol='org.apache.impala.TestUdf';
---- CATCH
AnalysisException: Type TIMESTAMP is not supported for Java UDFs.
====
---- QUERY
create function var_args_func(int...) returns int
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
symbol='org.apache.impala.TestUdf';
---- CATCH
CatalogException: Variable arguments not supported in Hive UDFs.
====