mirror of
https://github.com/apache/impala.git
synced 2026-01-02 03:00:32 -05:00
This commit adds a new feature to persist hive/java udfs across catalog restarts. IMPALA-1748 already added this for non-java udfs by storing them in parameters map of the Db object and reading them back at catalog startup. However we follow a different approach for hive udfs by converting them to Hive's function format and adding them as hive functions to the metastore. This makes it possible to share udfs between hive and Impala as the udfs added from one service are accessible to other. This commit takes care of format conversions between hive and impala and user can just add function once in either of the services. Background: Hive and impala treat udfs differently. Hive resolves the evaluate function in the udf class at runtime depending on the data types of the input arguments. So user can add one function by name and can pass any arguments to it as long as there is a compatible evaluate function in the udf class. However Impala takes the input types of the udf as a part of function definition (that maps to only one evaluate function) and loads the function only for those set of input argument types. If we have multiple 'evaluate' methods, we need to add multiple functions one for each of them. This commit adds new variants of CREATE | DROP FUNCTIONS to Impala which lets the user to create and drop hive/java udfs without input argument types or return types. Catalog takes care of loading/dropping the udf signatures corresponding to each "evaluate" method in the udf symbol class. The syntax is as follows, CREATE FUNCTION [IF NOT EXISTS] <function name> <function_opts> DROP FUNCTION [IF EXISTS] <function name> Examples: CREATE FUNCTION IF NOT EXISTS foo location '/path/to/jar' SYMBOL='TestUdf'; CREATE FUNCTION bar location '/path/to/jar' SYMBOL='TestUdf2'; DROP FUNCTION foo; DROP FUNCTION IF EXISTS bar; The older way of creating hive/java udfs with specific signature is still supported, however they are *not* persisted across restarts. So a restart of catalog can wipe them out. Additionally this commit also loads all the compatible java udfs added outside of Impala and they needn't be separately loaded. One thing to note here is that the functions added using the new CREATE FUNCTION can only be dropped using the new DROP FUNCTION syntax (without signature). The same rule applies for the java udfs added using the old CREATE FUNCTION syntax (with signature). Change-Id: If31ed3d5ac4192e3bc2d57610a9a0bbe1f62b42d Reviewed-on: http://gerrit.cloudera.org:8080/2250 Reviewed-by: Bharath Vissapragada <bharathv@cloudera.com> Tested-by: Internal Jenkins
406 lines
13 KiB
Plaintext
406 lines
13 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Drop the dummy udfs this test uses.
|
|
drop function if exists default.fn();
|
|
drop function if exists function_ddl_test.fn();
|
|
drop function if exists function_ddl_test.fn(int);
|
|
drop function if exists function_ddl_test.fn(int, string);
|
|
drop function if exists function_ddl_test.fn(string, int);
|
|
drop function if exists function_ddl_test.fn2(int);
|
|
drop function if exists function_ddl_test.fn2(int, string);
|
|
drop function if exists function_ddl_test.fn_var_arg(int...);
|
|
drop function if exists function_ddl_test.agg_fn(int);
|
|
drop function if exists function_ddl_test.agg_fn(int, string);
|
|
====
|
|
---- QUERY
|
|
# Verify all the test functions are removed
|
|
show functions in function_ddl_test
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Verify all the test functions are removed
|
|
show aggregate functions in function_ddl_test
|
|
---- RESULTS
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Add them and test function overloading and scoping.
|
|
create function default.fn() RETURNS int
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' SYMBOL='Fn'
|
|
====
|
|
---- QUERY
|
|
create function function_ddl_test.fn() RETURNS int
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' SYMBOL='Fn'
|
|
====
|
|
---- QUERY
|
|
create function function_ddl_test.fn(int) RETURNS double
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' SYMBOL='Fn'
|
|
====
|
|
---- QUERY
|
|
create function function_ddl_test.fn(int, string) RETURNS int
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' SYMBOL='Fn'
|
|
====
|
|
---- QUERY
|
|
create function function_ddl_test.fn(string, int) RETURNS int
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' SYMBOL='Fn'
|
|
====
|
|
---- QUERY
|
|
create function function_ddl_test.fn2(int) RETURNS int
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' SYMBOL='Fn2'
|
|
====
|
|
---- QUERY
|
|
create function function_ddl_test.fn2(int, string) RETURNS int
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' SYMBOL='Fn2'
|
|
====
|
|
---- QUERY
|
|
create function function_ddl_test.fn_var_arg(int...) RETURNS int
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' SYMBOL='VarSum'
|
|
====
|
|
---- QUERY
|
|
create aggregate function function_ddl_test.agg_fn(int) RETURNS bigint
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libudasample.so' UPDATE_FN='CountUpdate'
|
|
====
|
|
---- QUERY
|
|
create aggregate function function_ddl_test.agg_fn(int, string) RETURNS int
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/libTestUdas.so' UPDATE_FN='TwoArgUpdate'
|
|
====
|
|
---- QUERY
|
|
show functions in default
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS
|
|
'INT','fn()','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show functions in function_ddl_test
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS
|
|
'INT','fn_var_arg(INT...)','NATIVE','true'
|
|
'INT','fn2(INT)','NATIVE','true'
|
|
'INT','fn2(INT, STRING)','NATIVE','true'
|
|
'INT','fn()','NATIVE','true'
|
|
'DOUBLE','fn(INT)','NATIVE','true'
|
|
'INT','fn(INT, STRING)','NATIVE','true'
|
|
'INT','fn(STRING, INT)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show aggregate functions in function_ddl_test
|
|
---- RESULTS
|
|
'BIGINT','agg_fn(INT)','NATIVE','true'
|
|
'INT','agg_fn(INT, STRING)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Check that none of the functions show up as analytic functions.
|
|
show analytic functions in function_ddl_test
|
|
---- RESULTS
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show create function default.fn
|
|
---- RESULTS: MULTI_LINE
|
|
['CREATE FUNCTION default.fn()
|
|
RETURNS INT
|
|
LOCATION '$NAMENODE/test-warehouse/libTestUdfs.so'
|
|
SYMBOL='_Z2FnPN10impala_udf15FunctionContextE'
|
|
']
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show create function function_ddl_test.fn_var_arg
|
|
---- RESULTS: MULTI_LINE
|
|
['CREATE FUNCTION function_ddl_test.fn_var_arg(INT...)
|
|
RETURNS INT
|
|
LOCATION '$NAMENODE/test-warehouse/libTestUdfs.so'
|
|
SYMBOL='_Z6VarSumPN10impala_udf15FunctionContextEiPKNS_6IntValE'
|
|
']
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show create aggregate function function_ddl_test.agg_fn
|
|
---- RESULTS: MULTI_LINE
|
|
['CREATE AGGREGATE FUNCTION function_ddl_test.agg_fn(INT)
|
|
RETURNS BIGINT
|
|
LOCATION '$NAMENODE/test-warehouse/libudasample.so'
|
|
UPDATE_FN='_Z11CountUpdatePN10impala_udf15FunctionContextERKNS_6IntValEPNS_9BigIntValE'
|
|
INIT_FN='_Z9CountInitPN10impala_udf15FunctionContextEPNS_9BigIntValE'
|
|
MERGE_FN='_Z10CountMergePN10impala_udf15FunctionContextERKNS_9BigIntValEPS2_'
|
|
FINALIZE_FN='_Z13CountFinalizePN10impala_udf15FunctionContextERKNS_9BigIntValE'
|
|
CREATE AGGREGATE FUNCTION function_ddl_test.agg_fn(INT, STRING)
|
|
RETURNS INT
|
|
LOCATION '$NAMENODE/test-warehouse/libTestUdas.so'
|
|
UPDATE_FN='_Z12TwoArgUpdatePN10impala_udf15FunctionContextERKNS_6IntValERKNS_9StringValEPS2_'
|
|
INIT_FN='_Z10TwoArgInitPN10impala_udf15FunctionContextEPNS_6IntValE'
|
|
MERGE_FN='_Z11TwoArgMergePN10impala_udf15FunctionContextERKNS_6IntValEPS2_'
|
|
']
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show create function _impala_builtins.sin
|
|
---- RESULTS: MULTI_LINE
|
|
['CREATE FUNCTION _impala_builtins.sin(DOUBLE)
|
|
RETURNS DOUBLE
|
|
LOCATION 'null'
|
|
SYMBOL='_ZN6impala13MathFunctions3SinEPN10impala_udf15FunctionContextERKNS1_9DoubleValE'
|
|
']
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show create aggregate function _impala_builtins.avg
|
|
---- RESULTS: MULTI_LINE
|
|
['CREATE AGGREGATE FUNCTION _impala_builtins.avg(BIGINT)
|
|
RETURNS DOUBLE
|
|
INTERMEDIATE STRING
|
|
LOCATION 'null'
|
|
UPDATE_FN='_ZN6impala18AggregateFunctions9AvgUpdateIN10impala_udf9BigIntValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE'
|
|
INIT_FN='_ZN6impala18AggregateFunctions7AvgInitEPN10impala_udf15FunctionContextEPNS1_9StringValE'
|
|
MERGE_FN='_ZN6impala18AggregateFunctions8AvgMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_'
|
|
SERIALIZE_FN='_ZN6impala18AggregateFunctions28StringValSerializeOrFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE'
|
|
FINALIZE_FN='_ZN6impala18AggregateFunctions11AvgFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE'
|
|
CREATE AGGREGATE FUNCTION _impala_builtins.avg(DECIMAL(*,*))
|
|
RETURNS DECIMAL(*,*)
|
|
INTERMEDIATE STRING
|
|
LOCATION 'null'
|
|
UPDATE_FN='_ZN6impala18AggregateFunctions16DecimalAvgUpdateEPN10impala_udf15FunctionContextERKNS1_10DecimalValEPNS1_9StringValE'
|
|
INIT_FN='_ZN6impala18AggregateFunctions14DecimalAvgInitEPN10impala_udf15FunctionContextEPNS1_9StringValE'
|
|
MERGE_FN='_ZN6impala18AggregateFunctions15DecimalAvgMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_'
|
|
SERIALIZE_FN='_ZN6impala18AggregateFunctions28StringValSerializeOrFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE'
|
|
FINALIZE_FN='_ZN6impala18AggregateFunctions18DecimalAvgFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE'
|
|
CREATE AGGREGATE FUNCTION _impala_builtins.avg(DOUBLE)
|
|
RETURNS DOUBLE
|
|
INTERMEDIATE STRING
|
|
LOCATION 'null'
|
|
UPDATE_FN='_ZN6impala18AggregateFunctions9AvgUpdateIN10impala_udf9DoubleValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE'
|
|
INIT_FN='_ZN6impala18AggregateFunctions7AvgInitEPN10impala_udf15FunctionContextEPNS1_9StringValE'
|
|
MERGE_FN='_ZN6impala18AggregateFunctions8AvgMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_'
|
|
SERIALIZE_FN='_ZN6impala18AggregateFunctions28StringValSerializeOrFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE'
|
|
FINALIZE_FN='_ZN6impala18AggregateFunctions11AvgFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE'
|
|
CREATE AGGREGATE FUNCTION _impala_builtins.avg(TIMESTAMP)
|
|
RETURNS TIMESTAMP
|
|
INTERMEDIATE STRING
|
|
LOCATION 'null'
|
|
UPDATE_FN='_ZN6impala18AggregateFunctions18TimestampAvgUpdateEPN10impala_udf15FunctionContextERKNS1_12TimestampValEPNS1_9StringValE'
|
|
INIT_FN='_ZN6impala18AggregateFunctions7AvgInitEPN10impala_udf15FunctionContextEPNS1_9StringValE'
|
|
MERGE_FN='_ZN6impala18AggregateFunctions8AvgMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_'
|
|
SERIALIZE_FN='_ZN6impala18AggregateFunctions28StringValSerializeOrFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE'
|
|
FINALIZE_FN='_ZN6impala18AggregateFunctions20TimestampAvgFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE'
|
|
']
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
drop function function_ddl_test.fn2(int, string)
|
|
====
|
|
---- QUERY
|
|
show functions
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS
|
|
'INT','fn_var_arg(INT...)','NATIVE','true'
|
|
'INT','fn2(INT)','NATIVE','true'
|
|
'INT','fn()','NATIVE','true'
|
|
'DOUBLE','fn(INT)','NATIVE','true'
|
|
'INT','fn(INT, STRING)','NATIVE','true'
|
|
'INT','fn(STRING, INT)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop function if exists function_ddl_test.fn2(int, string)
|
|
====
|
|
---- QUERY
|
|
show functions in function_ddl_test
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS
|
|
'INT','fn_var_arg(INT...)','NATIVE','true'
|
|
'INT','fn2(INT)','NATIVE','true'
|
|
'INT','fn()','NATIVE','true'
|
|
'DOUBLE','fn(INT)','NATIVE','true'
|
|
'INT','fn(INT, STRING)','NATIVE','true'
|
|
'INT','fn(STRING, INT)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show functions in default;
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS
|
|
'INT','fn()','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop function default.fn()
|
|
====
|
|
---- QUERY
|
|
show functions in default;
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show functions in function_ddl_test;
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS
|
|
'INT','fn_var_arg(INT...)','NATIVE','true'
|
|
'INT','fn2(INT)','NATIVE','true'
|
|
'INT','fn()','NATIVE','true'
|
|
'DOUBLE','fn(INT)','NATIVE','true'
|
|
'INT','fn(INT, STRING)','NATIVE','true'
|
|
'INT','fn(STRING, INT)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop function fn()
|
|
====
|
|
---- QUERY
|
|
show functions;
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS
|
|
'INT','fn_var_arg(INT...)','NATIVE','true'
|
|
'INT','fn2(INT)','NATIVE','true'
|
|
'DOUBLE','fn(INT)','NATIVE','true'
|
|
'INT','fn(INT, STRING)','NATIVE','true'
|
|
'INT','fn(STRING, INT)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop function fn_var_arg(INT...)
|
|
====
|
|
---- QUERY
|
|
show functions;
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS
|
|
'INT','fn2(INT)','NATIVE','true'
|
|
'DOUBLE','fn(INT)','NATIVE','true'
|
|
'INT','fn(INT, STRING)','NATIVE','true'
|
|
'INT','fn(STRING, INT)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop function agg_fn(int)
|
|
====
|
|
---- QUERY
|
|
show aggregate functions
|
|
---- RESULTS
|
|
'INT','agg_fn(INT, STRING)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Call invalidate metadata and make sure the functions are still there
|
|
invalidate metadata
|
|
---- RESULTS
|
|
---- TYPES
|
|
====
|
|
---- QUERY
|
|
show functions
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS
|
|
'INT','fn2(INT)','NATIVE','true'
|
|
'DOUBLE','fn(INT)','NATIVE','true'
|
|
'INT','fn(INT, STRING)','NATIVE','true'
|
|
'INT','fn(STRING, INT)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show aggregate functions
|
|
---- RESULTS
|
|
'INT','agg_fn(INT, STRING)','NATIVE','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show analytic functions
|
|
---- RESULTS
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Negative test for showing builtin scalar functions. The result
|
|
# should not contain aggregate or analytic functions. Note that
|
|
# the result must be non-empty for the test to suceed.
|
|
show functions in _impala_builtins;
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS: VERIFY_IS_NOT_IN
|
|
'DOUBLE','avg(BIGINT)','BUILTIN','true'
|
|
'BIGINT','rank()','BUILTIN','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Positive test for showing builtin scalar functions.
|
|
show functions in _impala_builtins;
|
|
---- LABELS
|
|
return type, signature, binary type, is persistent
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'STRING','upper(STRING)','BUILTIN','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Negative test for showing builtin aggregate fuctions. The result should
|
|
# not contain scalar or analytic functions.
|
|
show aggregate functions in _impala_builtins;
|
|
---- RESULTS: VERIFY_IS_NOT_IN
|
|
'BIGINT','rank()','BUILTIN','true'
|
|
'STRING','upper(STRING)','BUILTIN','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Positive test for showing builtin aggregate functions.
|
|
show aggregate functions in _impala_builtins;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'STRING','group_concat(STRING)','BUILTIN','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Negative test for showing builtin analytic functions. The result should
|
|
# not contain non-analytic aggregate functions or scalar functions.
|
|
show analytic functions in _impala_builtins;
|
|
---- RESULTS: VERIFY_IS_NOT_IN
|
|
'STRING','group_concat(STRING)','BUILTIN','true'
|
|
'STRING','upper(STRING)','BUILTIN','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Positive test for showing builtin analytic functions.
|
|
show analytic functions in _impala_builtins;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'BIGINT','rank()','BUILTIN','true'
|
|
---- TYPES
|
|
STRING, STRING, STRING, STRING
|
|
====
|