Files
impala/testdata/workloads/functional-query/queries/QueryTest/java-udf.test
Tim Armstrong 381e719065 IMPALA-4266: Java udf returning string can give incorrect results
The memory management of string results was wrong: strings returned from
Exprs must live until the next time FreeLocalAllocations() is called.
Otherwise the buffer holding the string is freed or reused by the next
UDF call. The fix is to copy string values into a buffer with the
right lifetime.

Testing:
Added a regression test based on Bharath's example that reproduced the
bug reliably.

Change-Id: I705d271814cb1143f67d8a12f4fd87bab7a8e161
Reviewed-on: http://gerrit.cloudera.org:8080/4941
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Internal Jenkins
2016-11-08 02:47:11 +00:00

316 lines
6.3 KiB
Plaintext

====
---- QUERY
select udf_test.hive_pi()
---- RESULTS
3.141592653589793
---- TYPES
DOUBLE
====
---- QUERY
select udf_test.hive_bin(100)
---- RESULTS
'1100100'
---- TYPES
STRING
====
---- QUERY
select min(udf_test.hive_pi()) from functional.alltypesagg
---- RESULTS
3.141592653589793
---- TYPES
DOUBLE
====
---- QUERY
# Test identity functions
select udf_test.identity(true);
---- TYPES
boolean
---- RESULTS
true
====
---- QUERY
select udf_test.identity(cast(10 as tinyint));
---- TYPES
tinyint
---- RESULTS
10
====
---- QUERY
select udf_test.identity(cast(10 as smallint));
---- TYPES
smallint
---- RESULTS
10
====
---- QUERY
select udf_test.identity(cast(10 as int));
---- TYPES
int
---- RESULTS
10
====
---- QUERY
select udf_test.identity(cast(10 as bigint));
---- TYPES
bigint
---- RESULTS
10
====
---- QUERY
select udf_test.identity(cast(10.0 as float));
---- TYPES
float
---- RESULTS
10
====
---- QUERY
select udf_test.identity(cast(10.0 as double));
---- TYPES
double
---- RESULTS
10
====
---- QUERY
# IMPALA-1456. Each "identity" call below tests a different type (BytesWritable, Text,
# and String).
select udf_test.identity("why hello there"),
udf_test.identity("why", " hello there"),
udf_test.identity("why", " hello", " there");
---- TYPES
string, string, string
---- RESULTS
'why hello there','why hello there','why hello there'
====
---- QUERY
select udf_test.identity(NULL);
---- TYPES
boolean
---- RESULTS
NULL
====
---- QUERY
# IMPALA-1134. Each "identity" call below tests a different type (BytesWritable, Text,
# and String). The different types are handled slightly differently.
select length(udf_test.identity("0123456789")),
length(udf_test.identity("0123456789", "0123456789")),
length(udf_test.identity("0123456789", "0123456789", "0123456789"));
---- TYPES
int, int, int
---- RESULTS
10,20,30
====
---- QUERY
# IMPALA-1392: Hive UDFs that throw exceptions should return NULL
select udf_test.throws_exception();
---- TYPES
boolean
---- RESULTS
NULL
====
---- QUERY
select udf_test.throws_exception() from functional.alltypestiny;
---- TYPES
boolean
---- RESULTS
NULL
NULL
NULL
NULL
NULL
NULL
NULL
NULL
====
---- QUERY
select udf_test.hive_add(cast(1 as int), cast(2 as int));
---- TYPES
int
---- RESULTS
3
====
---- QUERY
select udf_test.hive_add(udf_test.hive_add(cast(1 as int), cast(2 as int)), cast(2 as int));
---- TYPES
int
---- RESULTS
5
====
---- QUERY
select udf_test.hive_add(cast(udf_test.hive_add(cast(1 as int), cast(2 as int)) - udf_test.hive_add(cast(2 as int), cast(1 as int)) as int), cast(2 as int));
---- TYPES
int
---- RESULTS
2
====
---- QUERY
select udf_test.hive_add(cast(1 as smallint), cast(2 as smallint));
---- TYPES
smallint
---- RESULTS
3
====
---- QUERY
select udf_test.hive_add(cast(1.0 as float), cast(2.0 as float));
---- TYPES
float
---- RESULTS
3.0
====
---- QUERY
select udf_test.hive_add(cast(1.0 as double), cast(2.0 as double));
---- TYPES
double
---- RESULTS
3.0
====
---- QUERY
select udf_test.hive_add(cast(1 as boolean), cast(0 as boolean));
---- TYPES
boolean
---- RESULTS
false
====
---- QUERY
# Testing whether all of persistent Java udfs are accessible.
select java_udfs_test.identity(true);
---- TYPES
boolean
---- RESULTS
true
====
---- QUERY
select java_udfs_test.identity(cast(10 as tinyint));
---- TYPES
tinyint
---- RESULTS
10
====
---- QUERY
select java_udfs_test.identity(cast(10 as smallint));
---- TYPES
smallint
---- RESULTS
10
====
---- QUERY
select java_udfs_test.identity(cast(10 as int));
---- TYPES
int
---- RESULTS
10
====
---- QUERY
select java_udfs_test.identity(cast(10 as bigint));
---- TYPES
bigint
---- RESULTS
10
====
---- QUERY
select java_udfs_test.identity(cast(10.0 as float));
---- TYPES
float
---- RESULTS
10
====
---- QUERY
select java_udfs_test.identity(cast(10.0 as double));
---- TYPES
double
---- RESULTS
10
====
---- QUERY
select java_udfs_test.identity("a", "b");
---- TYPES
string
---- RESULTS
'ab'
====
---- QUERY
select java_udfs_test.identity("a", "b", "c");
---- TYPES
string
---- RESULTS
'abc'
====
---- QUERY
# IMPALA-3378: test many Java UDFs being opened and run concurrently
select * from
(select max(int_col) from functional.alltypesagg
where udf_test.identity(bool_col) union all
(select max(int_col) from functional.alltypesagg
where udf_test.identity(tinyint_col) > 1 union all
(select max(int_col) from functional.alltypesagg
where udf_test.identity(smallint_col) > 1 union all
(select max(int_col) from functional.alltypesagg
where udf_test.identity(int_col) > 1 union all
(select max(int_col) from functional.alltypesagg
where udf_test.identity(bigint_col) > 1 union all
(select max(int_col) from functional.alltypesagg
where udf_test.identity(float_col) > 1.0 union all
(select max(int_col) from functional.alltypesagg
where udf_test.identity(double_col) > 1.0 union all
(select max(int_col) from functional.alltypesagg
where udf_test.identity(string_col) > '1' union all
(select max(int_col) from functional.alltypesagg
where not udf_test.identity(bool_col) union all
(select max(int_col) from functional.alltypesagg
where udf_test.identity(tinyint_col) > 2 union all
(select max(int_col) from functional.alltypesagg
where udf_test.identity(smallint_col) > 2 union all
(select max(int_col) from functional.alltypesagg
where udf_test.identity(int_col) > 2 union all
(select max(int_col) from functional.alltypesagg
where udf_test.identity(bigint_col) > 2 union all
(select max(int_col) from functional.alltypesagg
where udf_test.identity(float_col) > 2.0 union all
(select max(int_col) from functional.alltypesagg
where udf_test.identity(double_col) > 2.0 union all
(select max(int_col) from functional.alltypesagg
where udf_test.identity(string_col) > '2'
)))))))))))))))) v
---- TYPES
INT
---- RESULTS
998
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
====
---- QUERY
drop table if exists replace_string_input
====
---- QUERY
create table replace_string_input as
values('toast'), ('scone'), ('stuff'), ('sssss'), ('yes'), ('scone'), ('stuff');
====
---- QUERY
# Regression test for IMPALA-4266: memory management bugs with output strings from
# Java UDFS, exposed by using the UDF as a grouping key in an aggregation.
# The UDF replaces "s" with "ss" in the strings.
select distinct udf_test.replace_string(_c0) as es
from replace_string_input
order by 1;
---- TYPES
string
---- RESULTS
'sscone'
'ssssssssss'
'sstuff'
'toasst'
'yess'
====