Files
impala/testdata/workloads/functional-query/queries/QueryTest/java-udf.test
Bharath Vissapragada ef0dac661c IMPALA-2843: Persist hive udfs across catalog restarts
This commit adds a new feature to persist hive/java udfs across
catalog restarts. IMPALA-1748 already added this for non-java
udfs by storing them in parameters map of the Db object and
reading them back at catalog startup. However we follow a
different approach for hive udfs by converting them to Hive's
function format and adding them as hive functions to the metastore.
This makes it possible to share udfs between hive and Impala as the
udfs added from one service are accessible to other. This commit
takes care of format conversions between hive and impala and user
can just add function once in either of the services.

Background: Hive and impala treat udfs differently. Hive resolves the
evaluate function in the udf class at runtime depending on the data
types of the input arguments. So user can add one function by name and
can pass any arguments to it as long as there is a compatible evaluate
function in the udf class. However Impala takes the input types of the
udf as a part of function definition (that maps to only one evaluate
function) and loads the function only for those set of input argument
types. If we have multiple 'evaluate' methods, we need to add multiple
functions one for each of them.

This commit adds new variants of CREATE | DROP FUNCTIONS  to Impala which
lets the user to create and drop hive/java udfs without input argument
types or return types. Catalog takes care of loading/dropping the udf
signatures corresponding to each "evaluate" method in the udf symbol
class. The syntax is as follows,

CREATE FUNCTION [IF NOT EXISTS] <function name> <function_opts>
DROP FUNCTION [IF EXISTS] <function name>

Examples:

CREATE FUNCTION IF NOT EXISTS foo location '/path/to/jar' SYMBOL='TestUdf';
CREATE FUNCTION bar location '/path/to/jar' SYMBOL='TestUdf2';
DROP FUNCTION foo;
DROP FUNCTION IF EXISTS bar;

The older way of creating hive/java udfs with specific signature is still supported,
however they are *not* persisted across restarts. So a restart of catalog can
wipe them out. Additionally this commit also loads all the compatible java udfs
added outside of Impala and they needn't be separately loaded. One thing
to note here is that the functions added using the new CREATE FUNCTION
can only be dropped using the new DROP FUNCTION syntax (without
signature). The same rule applies for the java udfs added using the old
CREATE FUNCTION syntax (with signature).

Change-Id: If31ed3d5ac4192e3bc2d57610a9a0bbe1f62b42d
Reviewed-on: http://gerrit.cloudera.org:8080/2250
Reviewed-by: Bharath Vissapragada <bharathv@cloudera.com>
Tested-by: Internal Jenkins
2016-02-19 23:04:03 -08:00

237 lines
3.9 KiB
Plaintext

====
---- QUERY
select udf_test.hive_pi()
---- RESULTS
3.141592653589793
---- TYPES
DOUBLE
====
---- QUERY
select udf_test.hive_bin(100)
---- RESULTS
'1100100'
---- TYPES
STRING
====
---- QUERY
select min(udf_test.hive_pi()) from functional.alltypesagg
---- RESULTS
3.141592653589793
---- TYPES
DOUBLE
====
---- QUERY
# Test identity functions
select udf_test.identity(true);
---- TYPES
boolean
---- RESULTS
true
====
---- QUERY
select udf_test.identity(cast(10 as tinyint));
---- TYPES
tinyint
---- RESULTS
10
====
---- QUERY
select udf_test.identity(cast(10 as smallint));
---- TYPES
smallint
---- RESULTS
10
====
---- QUERY
select udf_test.identity(cast(10 as int));
---- TYPES
int
---- RESULTS
10
====
---- QUERY
select udf_test.identity(cast(10 as bigint));
---- TYPES
bigint
---- RESULTS
10
====
---- QUERY
select udf_test.identity(cast(10.0 as float));
---- TYPES
float
---- RESULTS
10
====
---- QUERY
select udf_test.identity(cast(10.0 as double));
---- TYPES
double
---- RESULTS
10
====
---- QUERY
# IMPALA-1456. Each "identity" call below tests a different type (BytesWritable, Text,
# and String).
select udf_test.identity("why hello there"),
udf_test.identity("why", " hello there"),
udf_test.identity("why", " hello", " there");
---- TYPES
string, string, string
---- RESULTS
'why hello there','why hello there','why hello there'
====
---- QUERY
select udf_test.identity(NULL);
---- TYPES
boolean
---- RESULTS
NULL
====
---- QUERY
# IMPALA-1134. Each "identity" call below tests a different type (BytesWritable, Text,
# and String). The different types are handled slightly differently.
select length(udf_test.identity("0123456789")),
length(udf_test.identity("0123456789", "0123456789")),
length(udf_test.identity("0123456789", "0123456789", "0123456789"));
---- TYPES
int, int, int
---- RESULTS
10,20,30
====
---- QUERY
# IMPALA-1392: Hive UDFs that throw exceptions should return NULL
select udf_test.throws_exception();
---- TYPES
boolean
---- RESULTS
NULL
====
---- QUERY
select udf_test.throws_exception() from functional.alltypestiny;
---- TYPES
boolean
---- RESULTS
NULL
NULL
NULL
NULL
NULL
NULL
NULL
NULL
====
---- QUERY
select udf_test.hive_add(cast(1 as int), cast(2 as int));
---- TYPES
int
---- RESULTS
3
====
---- QUERY
select udf_test.hive_add(udf_test.hive_add(cast(1 as int), cast(2 as int)), cast(2 as int));
---- TYPES
int
---- RESULTS
5
====
---- QUERY
select udf_test.hive_add(cast(udf_test.hive_add(cast(1 as int), cast(2 as int)) - udf_test.hive_add(cast(2 as int), cast(1 as int)) as int), cast(2 as int));
---- TYPES
int
---- RESULTS
2
====
---- QUERY
select udf_test.hive_add(cast(1 as smallint), cast(2 as smallint));
---- TYPES
smallint
---- RESULTS
3
====
---- QUERY
select udf_test.hive_add(cast(1.0 as float), cast(2.0 as float));
---- TYPES
float
---- RESULTS
3.0
====
---- QUERY
select udf_test.hive_add(cast(1.0 as double), cast(2.0 as double));
---- TYPES
double
---- RESULTS
3.0
====
---- QUERY
select udf_test.hive_add(cast(1 as boolean), cast(0 as boolean));
---- TYPES
boolean
---- RESULTS
false
====
---- QUERY
# Testing whether all of persistent Java udfs are accessible.
select java_udfs_test.identity(true);
---- TYPES
boolean
---- RESULTS
true
====
---- QUERY
select java_udfs_test.identity(cast(10 as tinyint));
---- TYPES
tinyint
---- RESULTS
10
====
---- QUERY
select java_udfs_test.identity(cast(10 as smallint));
---- TYPES
smallint
---- RESULTS
10
====
---- QUERY
select java_udfs_test.identity(cast(10 as int));
---- TYPES
int
---- RESULTS
10
====
---- QUERY
select java_udfs_test.identity(cast(10 as bigint));
---- TYPES
bigint
---- RESULTS
10
====
---- QUERY
select java_udfs_test.identity(cast(10.0 as float));
---- TYPES
float
---- RESULTS
10
====
---- QUERY
select java_udfs_test.identity(cast(10.0 as double));
---- TYPES
double
---- RESULTS
10
====
---- QUERY
select java_udfs_test.identity("a", "b");
---- TYPES
string
---- RESULTS
'ab'
====
---- QUERY
select java_udfs_test.identity("a", "b", "c");
---- TYPES
string
---- RESULTS
'abc'
====