mirror of
https://github.com/apache/impala.git
synced 2026-01-17 12:00:29 -05:00
Before this patch if an argument of a GenericUDF was NULL, then Impala passed it as null instead of a DeferredObject. This was incorrect, as a DeferredObject is expected with a get() function that returns null. See the Jira for more details and GenericUDF examples in Hive. TestGenericUdf's NULL handling was further broken in IMPALA-11549, leading to throwing null pointer exceptions when the UDF's result is NULL. This test bug was not detected, because Hive udf tests were running with default abort_java_udf_on_exception=false, which means that exceptions from Hive UDFs only led to warnings and returning NULL, which was the expected result in all affected test queries. This patch fixes the behavior in HiveUdfExecutorGeneric and improves FE/EE tests to catch null handling related issues. Most Hive UDF tests are run with abort_java_udf_on_exception=true after this patch to treat exceptions in UDFs as errors. The ones where the test checks that NULL is returned if an exception is thrown while abort_java_udf_on_exception is false are moved to new .test files. TestGenericUdf is also fixed (and simplified) to handle NULL return values correctly. Change-Id: I53238612f4037572abb6d2cc913dd74ee830a9c9 Reviewed-on: http://gerrit.cloudera.org:8080/19499 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
352 lines
8.1 KiB
Plaintext
352 lines
8.1 KiB
Plaintext
====
|
|
---- QUERY
|
|
select hive_pi()
|
|
---- RESULTS
|
|
3.141592653589793
|
|
---- TYPES
|
|
DOUBLE
|
|
====
|
|
---- QUERY
|
|
select hive_bin(100)
|
|
---- RESULTS
|
|
'1100100'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select min(hive_pi()) from functional.alltypesagg
|
|
---- RESULTS
|
|
3.141592653589793
|
|
---- TYPES
|
|
DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Test identity functions
|
|
select identity(true), identity(cast(NULL as boolean));
|
|
---- TYPES
|
|
boolean, boolean
|
|
---- RESULTS
|
|
true,NULL
|
|
====
|
|
---- QUERY
|
|
select identity(cast(10 as tinyint)), identity(cast(NULL as tinyint));
|
|
---- TYPES
|
|
tinyint, tinyint
|
|
---- RESULTS
|
|
10,NULL
|
|
====
|
|
---- QUERY
|
|
select identity(cast(10 as smallint)), identity(cast(NULL as smallint));
|
|
---- TYPES
|
|
smallint, smallint
|
|
---- RESULTS
|
|
10,NULL
|
|
====
|
|
---- QUERY
|
|
select identity(cast(10 as int)), identity(cast(NULL as int));
|
|
---- TYPES
|
|
int, int
|
|
---- RESULTS
|
|
10,NULL
|
|
====
|
|
---- QUERY
|
|
select identity(cast(10 as bigint)), identity(cast(NULL as bigint));
|
|
---- TYPES
|
|
bigint, bigint
|
|
---- RESULTS
|
|
10,NULL
|
|
====
|
|
---- QUERY
|
|
select identity(cast(10.0 as float)), identity(cast(NULL as float));
|
|
---- TYPES
|
|
float, float
|
|
---- RESULTS
|
|
10,NULL
|
|
====
|
|
---- QUERY
|
|
select identity(cast(10.0 as double)), identity(cast(NULL as double));
|
|
---- TYPES
|
|
double, double
|
|
---- RESULTS
|
|
10,NULL
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1456. Each "identity" call below tests a different type (BytesWritable, Text,
|
|
# and String).
|
|
select identity("why hello there"),
|
|
identity("why", " hello there"),
|
|
identity("why", " hello", " there"),
|
|
identity(cast(NULL as string)),
|
|
identity(cast(NULL as string), cast(NULL as string)),
|
|
identity(cast(NULL as string), cast(NULL as string), cast(NULL as string));
|
|
---- TYPES
|
|
string, string, string, string, string, string
|
|
---- RESULTS
|
|
'why hello there','why hello there','why hello there','NULL','NULL','NULL'
|
|
====
|
|
---- QUERY
|
|
select identity(cast("a" as binary)), identity(cast(NULL as binary));
|
|
---- TYPES
|
|
binary, binary
|
|
---- RESULTS
|
|
'a','NULL'
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1134. Each "identity" call below tests a different type (BytesWritable, Text,
|
|
# and String). The different types are handled slightly differently.
|
|
select length(identity("0123456789")),
|
|
length(identity("0123456789", "0123456789")),
|
|
length(identity("0123456789", "0123456789", "0123456789"));
|
|
---- TYPES
|
|
int, int, int
|
|
---- RESULTS
|
|
10,20,30
|
|
====
|
|
---- QUERY
|
|
select throws_exception();
|
|
---- CATCH
|
|
Test exception
|
|
====
|
|
---- QUERY
|
|
select throws_exception() from functional.alltypestiny;
|
|
---- CATCH
|
|
Test exception
|
|
====
|
|
---- QUERY
|
|
select hive_add(cast(1 as int), cast(2 as int));
|
|
---- TYPES
|
|
int
|
|
---- RESULTS
|
|
3
|
|
====
|
|
---- QUERY
|
|
select hive_add(hive_add(cast(1 as int), cast(2 as int)), cast(2 as int));
|
|
---- TYPES
|
|
int
|
|
---- RESULTS
|
|
5
|
|
====
|
|
---- QUERY
|
|
select hive_add(cast(hive_add(cast(1 as int), cast(2 as int)) - hive_add(cast(2 as int), cast(1 as int)) as int), cast(2 as int));
|
|
---- TYPES
|
|
int
|
|
---- RESULTS
|
|
2
|
|
====
|
|
---- QUERY
|
|
select hive_add(cast(1 as smallint), cast(2 as smallint));
|
|
---- TYPES
|
|
smallint
|
|
---- RESULTS
|
|
3
|
|
====
|
|
---- QUERY
|
|
select hive_add(cast(1.0 as float), cast(2.0 as float));
|
|
---- TYPES
|
|
float
|
|
---- RESULTS
|
|
3.0
|
|
====
|
|
---- QUERY
|
|
select hive_add(cast(1.0 as double), cast(2.0 as double));
|
|
---- TYPES
|
|
double
|
|
---- RESULTS
|
|
3.0
|
|
====
|
|
---- QUERY
|
|
select hive_add(cast(1 as boolean), cast(0 as boolean));
|
|
---- TYPES
|
|
boolean
|
|
---- RESULTS
|
|
false
|
|
====
|
|
---- QUERY
|
|
# Testing whether all of persistent Java udfs are accessible.
|
|
select identity_anytype(true);
|
|
---- TYPES
|
|
boolean
|
|
---- RESULTS
|
|
true
|
|
====
|
|
---- QUERY
|
|
select identity_anytype(cast(10 as tinyint));
|
|
---- TYPES
|
|
tinyint
|
|
---- RESULTS
|
|
10
|
|
====
|
|
---- QUERY
|
|
select identity_anytype(cast(10 as smallint));
|
|
---- TYPES
|
|
smallint
|
|
---- RESULTS
|
|
10
|
|
====
|
|
---- QUERY
|
|
select identity_anytype(cast(10 as int));
|
|
---- TYPES
|
|
int
|
|
---- RESULTS
|
|
10
|
|
====
|
|
---- QUERY
|
|
select identity_anytype(cast(10 as bigint));
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
10
|
|
====
|
|
---- QUERY
|
|
select identity_anytype(cast(10.0 as float));
|
|
---- TYPES
|
|
float
|
|
---- RESULTS
|
|
10
|
|
====
|
|
---- QUERY
|
|
select identity_anytype(cast(10.0 as double));
|
|
---- TYPES
|
|
double
|
|
---- RESULTS
|
|
10
|
|
====
|
|
---- QUERY
|
|
select identity_anytype("a", "b");
|
|
---- TYPES
|
|
string
|
|
---- RESULTS
|
|
'ab'
|
|
====
|
|
---- QUERY
|
|
select identity_anytype("a", "b", "c");
|
|
---- TYPES
|
|
string
|
|
---- RESULTS
|
|
'abc'
|
|
====
|
|
---- QUERY
|
|
# BINARY is only supported when the function is created
|
|
# specifically with BINARY arguments / return type (IMPALA-11340).
|
|
select identity_anytype(cast("a" as binary));
|
|
---- CATCH
|
|
AnalysisException: No matching function with signature
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3378: test many Java UDFs being opened and run concurrently
|
|
select * from
|
|
(select max(int_col) from functional.alltypesagg
|
|
where identity(bool_col) union all
|
|
(select max(int_col) from functional.alltypesagg
|
|
where identity(tinyint_col) > 1 union all
|
|
(select max(int_col) from functional.alltypesagg
|
|
where identity(smallint_col) > 1 union all
|
|
(select max(int_col) from functional.alltypesagg
|
|
where identity(int_col) > 1 union all
|
|
(select max(int_col) from functional.alltypesagg
|
|
where identity(bigint_col) > 1 union all
|
|
(select max(int_col) from functional.alltypesagg
|
|
where identity(float_col) > 1.0 union all
|
|
(select max(int_col) from functional.alltypesagg
|
|
where identity(double_col) > 1.0 union all
|
|
(select max(int_col) from functional.alltypesagg
|
|
where identity(string_col) > '1' union all
|
|
(select max(int_col) from functional.alltypesagg
|
|
where not identity(bool_col) union all
|
|
(select max(int_col) from functional.alltypesagg
|
|
where identity(tinyint_col) > 2 union all
|
|
(select max(int_col) from functional.alltypesagg
|
|
where identity(smallint_col) > 2 union all
|
|
(select max(int_col) from functional.alltypesagg
|
|
where identity(int_col) > 2 union all
|
|
(select max(int_col) from functional.alltypesagg
|
|
where identity(bigint_col) > 2 union all
|
|
(select max(int_col) from functional.alltypesagg
|
|
where identity(float_col) > 2.0 union all
|
|
(select max(int_col) from functional.alltypesagg
|
|
where identity(double_col) > 2.0 union all
|
|
(select max(int_col) from functional.alltypesagg
|
|
where identity(string_col) > '2'
|
|
)))))))))))))))) v
|
|
---- TYPES
|
|
INT
|
|
---- RESULTS
|
|
998
|
|
999
|
|
999
|
|
999
|
|
999
|
|
999
|
|
999
|
|
999
|
|
999
|
|
999
|
|
999
|
|
999
|
|
999
|
|
999
|
|
999
|
|
999
|
|
====
|
|
---- QUERY
|
|
drop table if exists replace_string_input
|
|
====
|
|
---- QUERY
|
|
create table replace_string_input as
|
|
values('toast'), ('scone'), ('stuff'), ('sssss'), ('yes'), ('scone'), ('stuff');
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-4266: memory management bugs with output strings from
|
|
# Java UDFS, exposed by using the UDF as a grouping key in an aggregation.
|
|
# The UDF replaces "s" with "ss" in the strings.
|
|
select distinct replace_string(_c0) as es
|
|
from replace_string_input
|
|
order by 1;
|
|
---- TYPES
|
|
string
|
|
---- RESULTS
|
|
'sscone'
|
|
'ssssssssss'
|
|
'sstuff'
|
|
'toasst'
|
|
'yess'
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-8016; this UDF loads another class in the same jar.
|
|
select import_nearby_classes("placeholder");
|
|
---- TYPES
|
|
string
|
|
---- RESULTS
|
|
'Hello'
|
|
====
|
|
---- QUERY
|
|
# In the interpreted code path of HiveUdfCall, we use ScalarExprEvaluator::GetValue which
|
|
# evaluates the child expression and returns a nullptr if it is NULL. But in the case of
|
|
# TYPE_DATE it also returns nullptr if the date is invalid, so the interpreted path would
|
|
# handle invalid dates as nulls. Java UDFs involving Date are not allowed yet, this test
|
|
# is a reminder that we'll have to handle this case in codegen when we add support for Date.
|
|
create function identity(Date) returns Date
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
---- CATCH
|
|
AnalysisException: Type DATE is not supported for Java UDFs.
|
|
====
|
|
---- QUERY
|
|
# Timestamp values are not supported in Java UDFs yet and the implementation may have been
|
|
# written without Timestamps in mind. This test is a reminder that we'll need to review
|
|
# and test the implementation of HiveUdfCall (especially codegen) with Timestamp values
|
|
# when we add support for them.
|
|
create function identity(Timestamp) returns Timestamp
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
---- CATCH
|
|
AnalysisException: Type TIMESTAMP is not supported for Java UDFs.
|
|
====
|
|
---- QUERY
|
|
create function var_args_func(int...) returns int
|
|
location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
|
|
symbol='org.apache.impala.TestUdf';
|
|
---- CATCH
|
|
CatalogException: Variable arguments not supported in Hive UDFs.
|
|
====
|