mirror of
https://github.com/apache/impala.git
synced 2025-12-23 21:08:39 -05:00
Convert string functions to UDF interface
This also switches to using the re2 library for regular expression functions instead of boost. Change-Id: I4c3ae72ff2f7cbd5b1a2be719275f1b2e25f8ab2 Reviewed-on: http://gerrit.sjc.cloudera.com:8080/3412 Reviewed-by: Skye Wanderman-Milne <skye@cloudera.com> Tested-by: Skye Wanderman-Milne <skye@cloudera.com>
This commit is contained in:
committed by
Nong Li
parent
7a0cc27fd1
commit
f062a22997
@@ -61,6 +61,7 @@ set(IR_DEPENDENT_FILES
|
||||
../exprs/expr-ir.cc
|
||||
../exprs/math-functions.cc
|
||||
../exprs/operators.cc
|
||||
../exprs/string-functions.cc
|
||||
../exprs/udf-builtins.cc
|
||||
../exprs/like-predicate.cc
|
||||
../runtime/string-value-ir.cc
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
#include "exprs/expr-ir.cc"
|
||||
#include "exprs/math-functions.cc"
|
||||
#include "exprs/operators.cc"
|
||||
#include "exprs/string-functions.cc"
|
||||
#include "exprs/udf-builtins.cc"
|
||||
#include "runtime/string-value-ir.cc"
|
||||
#include "udf/udf.cc"
|
||||
|
||||
@@ -157,6 +157,10 @@ class AnyValUtil {
|
||||
}
|
||||
}
|
||||
|
||||
static std::string ToString(const StringVal& v) {
|
||||
return std::string(reinterpret_cast<char*>(v.ptr), v.len);
|
||||
}
|
||||
|
||||
static StringVal FromString(FunctionContext* ctx, const std::string& s) {
|
||||
return FromBuffer(ctx, s.c_str(), s.size());
|
||||
}
|
||||
|
||||
@@ -1438,6 +1438,7 @@ TEST_F(ExprTest, StringFunctions) {
|
||||
TestStringValue("repeat('', cast(6 as bigint))", "");
|
||||
TestStringValue("repeat('ab', 0)", "");
|
||||
TestStringValue("repeat('ab', -1)", "");
|
||||
TestStringValue("repeat('ab', -100)", "");
|
||||
TestStringValue("repeat('ab', 1)", "ab");
|
||||
TestStringValue("repeat('ab', cast(6 as bigint))", "abababababab");
|
||||
TestIsNull("repeat(NULL, 6)", TYPE_STRING);
|
||||
@@ -1581,8 +1582,8 @@ TEST_F(ExprTest, StringRegexpFunctions) {
|
||||
// Test finding of leftmost maximal match.
|
||||
TestStringValue("regexp_extract('I001=-200,I003=-210,I007=0', 'I001=-?[0-9]+', 0)",
|
||||
"I001=-200");
|
||||
// Invalid regex patter, unmatched parenthesis.
|
||||
TestIsNull("regexp_extract('abxcy1234a', '(/.', 0)", TYPE_STRING);
|
||||
// Invalid regex pattern, unmatched parenthesis.
|
||||
TestError("regexp_extract('abxcy1234a', '(/.', 0)");
|
||||
// NULL arguments.
|
||||
TestIsNull("regexp_extract(NULL, 'a.x', 2)", TYPE_STRING);
|
||||
TestIsNull("regexp_extract('abxcy1234a', NULL, 2)", TYPE_STRING);
|
||||
@@ -1602,8 +1603,8 @@ TEST_F(ExprTest, StringRegexpFunctions) {
|
||||
TestStringValue("regexp_replace('', 'err', '')", "");
|
||||
TestStringValue("regexp_replace('', '', 'abc')", "abc");
|
||||
TestStringValue("regexp_replace('axcaycazc', '', 'r')", "rarxrcraryrcrarzrcr");
|
||||
// Invalid regex patter, unmatched parenthesis.
|
||||
TestIsNull("regexp_replace('abxcy1234a', '(/.', 'x')", TYPE_STRING);
|
||||
// Invalid regex pattern, unmatched parenthesis.
|
||||
TestError("regexp_replace('abxcy1234a', '(/.', 'x')");
|
||||
// NULL arguments.
|
||||
TestIsNull("regexp_replace(NULL, 'a.*', 'abcde')", TYPE_STRING);
|
||||
TestIsNull("regexp_replace('axcaycazc', NULL, 'abcde')", TYPE_STRING);
|
||||
@@ -1810,30 +1811,30 @@ TEST_F(ExprTest, StringParseUrlFunction) {
|
||||
|
||||
// Invalid part parameters.
|
||||
// All characters in the part parameter must be uppercase (consistent with Hive).
|
||||
TestIsNull("parse_url('http://example.com', 'authority')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'Authority')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'AUTHORITYXYZ')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'file')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'File')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'FILEXYZ')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'host')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'Host')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'HOSTXYZ')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'path')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'Path')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'PATHXYZ')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'protocol')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'Protocol')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'PROTOCOLXYZ')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'query')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'Query')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'QUERYXYZ')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'ref')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'Ref')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'REFXYZ')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'userinfo')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'Userinfo')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com', 'USERINFOXYZ')", TYPE_STRING);
|
||||
TestError("parse_url('http://example.com', 'authority')");
|
||||
TestError("parse_url('http://example.com', 'Authority')");
|
||||
TestError("parse_url('http://example.com', 'AUTHORITYXYZ')");
|
||||
TestError("parse_url('http://example.com', 'file')");
|
||||
TestError("parse_url('http://example.com', 'File')");
|
||||
TestError("parse_url('http://example.com', 'FILEXYZ')");
|
||||
TestError("parse_url('http://example.com', 'host')");
|
||||
TestError("parse_url('http://example.com', 'Host')");
|
||||
TestError("parse_url('http://example.com', 'HOSTXYZ')");
|
||||
TestError("parse_url('http://example.com', 'path')");
|
||||
TestError("parse_url('http://example.com', 'Path')");
|
||||
TestError("parse_url('http://example.com', 'PATHXYZ')");
|
||||
TestError("parse_url('http://example.com', 'protocol')");
|
||||
TestError("parse_url('http://example.com', 'Protocol')");
|
||||
TestError("parse_url('http://example.com', 'PROTOCOLXYZ')");
|
||||
TestError("parse_url('http://example.com', 'query')");
|
||||
TestError("parse_url('http://example.com', 'Query')");
|
||||
TestError("parse_url('http://example.com', 'QUERYXYZ')");
|
||||
TestError("parse_url('http://example.com', 'ref')");
|
||||
TestError("parse_url('http://example.com', 'Ref')");
|
||||
TestError("parse_url('http://example.com', 'REFXYZ')");
|
||||
TestError("parse_url('http://example.com', 'userinfo')");
|
||||
TestError("parse_url('http://example.com', 'Userinfo')");
|
||||
TestError("parse_url('http://example.com', 'USERINFOXYZ')");
|
||||
|
||||
// NULL arguments.
|
||||
TestIsNull("parse_url(NULL, 'AUTHORITY')", TYPE_STRING);
|
||||
@@ -1888,8 +1889,8 @@ TEST_F(ExprTest, StringParseUrlFunction) {
|
||||
"index.html?test=true&name=networking&op=true', 'PROTOCOL', 'name')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com:80/docs/books/tutorial/"
|
||||
"index.html?test=true&name=networking&op=true', 'REF', 'name')", TYPE_STRING);
|
||||
TestIsNull("parse_url('http://example.com:80/docs/books/tutorial/"
|
||||
"index.html?test=true&name=networking&op=true', 'XYZ', 'name')", TYPE_STRING);
|
||||
TestError("parse_url('http://example.com:80/docs/books/tutorial/"
|
||||
"index.html?test=true&name=networking&op=true', 'XYZ', 'name')");
|
||||
}
|
||||
|
||||
TEST_F(ExprTest, UtilityFunctions) {
|
||||
|
||||
@@ -1022,7 +1022,7 @@ void Expr::InitBuiltinsDummy() {
|
||||
DecimalOperators::CastToDecimalVal(NULL, DecimalVal::null());
|
||||
MathFunctions::Pi(NULL);
|
||||
Operators::Add_IntVal_IntVal(NULL, IntVal::null(), IntVal::null());
|
||||
StringFunctions::Length(NULL, NULL);
|
||||
StringFunctions::Length(NULL, StringVal::null());
|
||||
TimestampFunctions::Year(NULL, TimestampVal::null());
|
||||
UdfBuiltins::Pi(NULL);
|
||||
UtilityFunctions::Pid(NULL, NULL);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -19,6 +19,8 @@
|
||||
#include "runtime/string-value.h"
|
||||
#include "runtime/string-search.h"
|
||||
|
||||
using namespace impala_udf;
|
||||
|
||||
namespace impala {
|
||||
|
||||
class Expr;
|
||||
@@ -27,33 +29,51 @@ class TupleRow;
|
||||
|
||||
class StringFunctions {
|
||||
public:
|
||||
template <class T> static void* Substring(Expr* e, TupleRow* row);
|
||||
template <class T> static void* Left(Expr* e, TupleRow* row);
|
||||
template <class T> static void* Right(Expr* e, TupleRow* row);
|
||||
template <class T> static void* Space(Expr* e, TupleRow* row);
|
||||
template <class T> static void* Repeat(Expr* e, TupleRow* row);
|
||||
template <class T> static void* Lpad(Expr* e, TupleRow* row);
|
||||
template <class T> static void* Rpad(Expr* e, TupleRow* row);
|
||||
static void* Length(Expr* e, TupleRow* row);
|
||||
static void* Lower(Expr* e, TupleRow* row);
|
||||
static void* Upper(Expr* e, TupleRow* row);
|
||||
static void* InitCap(Expr* e, TupleRow* row);
|
||||
static void* Reverse(Expr* e, TupleRow* row);
|
||||
static void* Translate(Expr* e, TupleRow* row);
|
||||
static void* Trim(Expr* e, TupleRow* row);
|
||||
static void* Ltrim(Expr* e, TupleRow* row);
|
||||
static void* Rtrim(Expr* e, TupleRow* row);
|
||||
static void* Ascii(Expr* e, TupleRow* row);
|
||||
static void* Instr(Expr* e, TupleRow* row);
|
||||
static void* Locate(Expr* e, TupleRow* row);
|
||||
template <class T> static void* LocatePos(Expr* e, TupleRow* row);
|
||||
template <class T> static void* RegexpExtract(Expr* e, TupleRow* row);
|
||||
static void* RegexpReplace(Expr* e, TupleRow* row);
|
||||
static void* Concat(Expr* e, TupleRow* row);
|
||||
static void* ConcatWs(Expr* e, TupleRow* row);
|
||||
static void* FindInSet(Expr* e, TupleRow* row);
|
||||
static void* ParseUrl(Expr* e, TupleRow* row);
|
||||
static void* ParseUrlKey(Expr* e, TupleRow* row);
|
||||
static StringVal Substring(FunctionContext*, const StringVal& str, const BigIntVal& pos,
|
||||
const BigIntVal& len);
|
||||
static StringVal Substring(FunctionContext*, const StringVal& str, const BigIntVal& pos);
|
||||
static StringVal Left(FunctionContext*, const StringVal& str, const BigIntVal& len);
|
||||
static StringVal Right(FunctionContext*, const StringVal& str, const BigIntVal& len);
|
||||
static StringVal Space(FunctionContext*, const BigIntVal& len);
|
||||
static StringVal Repeat(FunctionContext*, const StringVal& str, const BigIntVal& n);
|
||||
static StringVal Lpad(FunctionContext*, const StringVal& str, const BigIntVal& len,
|
||||
const StringVal& pad);
|
||||
static StringVal Rpad(FunctionContext*, const StringVal& str, const BigIntVal&,
|
||||
const StringVal& pad);
|
||||
static IntVal Length(FunctionContext*, const StringVal& str);
|
||||
static StringVal Lower(FunctionContext*, const StringVal& str);
|
||||
static StringVal Upper(FunctionContext*, const StringVal& str);
|
||||
static StringVal InitCap(FunctionContext*, const StringVal& str);
|
||||
static StringVal Reverse(FunctionContext*, const StringVal& str);
|
||||
static StringVal Translate(FunctionContext*, const StringVal& str, const StringVal& src,
|
||||
const StringVal& dst);
|
||||
static StringVal Trim(FunctionContext*, const StringVal& str);
|
||||
static StringVal Ltrim(FunctionContext*, const StringVal& str);
|
||||
static StringVal Rtrim(FunctionContext*, const StringVal& str);
|
||||
static IntVal Ascii(FunctionContext*, const StringVal& str);
|
||||
static IntVal Instr(FunctionContext*, const StringVal& str, const StringVal& substr);
|
||||
static IntVal Locate(FunctionContext*, const StringVal& substr, const StringVal& str);
|
||||
static IntVal LocatePos(FunctionContext*, const StringVal& substr, const StringVal& str,
|
||||
const BigIntVal& start_pos);
|
||||
|
||||
static void RegexpPrepare(FunctionContext*, FunctionContext::FunctionStateScope);
|
||||
static void RegexpClose(FunctionContext*, FunctionContext::FunctionStateScope);
|
||||
static StringVal RegexpExtract(FunctionContext*, const StringVal& str,
|
||||
const StringVal& pattern, const BigIntVal& index);
|
||||
static StringVal RegexpReplace(FunctionContext*, const StringVal& str,
|
||||
const StringVal& pattern, const StringVal& replace);
|
||||
|
||||
static StringVal Concat(FunctionContext*, int num_children, const StringVal* strs);
|
||||
static StringVal ConcatWs(FunctionContext*, const StringVal& sep, int num_children,
|
||||
const StringVal* strs);
|
||||
static IntVal FindInSet(FunctionContext*, const StringVal& str,
|
||||
const StringVal& str_set);
|
||||
|
||||
static void ParseUrlPrepare(FunctionContext*, FunctionContext::FunctionStateScope);
|
||||
static StringVal ParseUrl(FunctionContext*, const StringVal& url, const StringVal& part);
|
||||
static StringVal ParseUrlKey(FunctionContext*, const StringVal& url,
|
||||
const StringVal& key, const StringVal& part);
|
||||
static void ParseUrlClose(FunctionContext*, FunctionContext::FunctionStateScope);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -40,11 +40,11 @@ const StringSearch UrlParser::colon_search(&colon);
|
||||
const StringSearch UrlParser::question_search(&question);
|
||||
const StringSearch UrlParser::hash_search(&hash);
|
||||
|
||||
bool UrlParser::ParseUrl(const StringValue* url, UrlPart part, StringValue* result) {
|
||||
bool UrlParser::ParseUrl(const StringValue& url, UrlPart part, StringValue* result) {
|
||||
result->ptr = NULL;
|
||||
result->len = 0;
|
||||
// Remove leading and trailing spaces.
|
||||
StringValue trimmed_url = url->Trim();
|
||||
StringValue trimmed_url = url.Trim();
|
||||
|
||||
// All parts require checking for the protocol.
|
||||
int32_t protocol_pos = protocol_search.Search(&trimmed_url);
|
||||
@@ -157,17 +157,17 @@ bool UrlParser::ParseUrl(const StringValue* url, UrlPart part, StringValue* resu
|
||||
return true;
|
||||
}
|
||||
|
||||
bool UrlParser::ParseUrlKey(const StringValue* url, UrlPart part,
|
||||
const StringValue* key, StringValue* result) {
|
||||
bool UrlParser::ParseUrlKey(const StringValue& url, UrlPart part,
|
||||
const StringValue& key, StringValue* result) {
|
||||
// Part must be query to ask for a specific query key.
|
||||
if (part != QUERY) {
|
||||
return false;
|
||||
}
|
||||
// Remove leading and trailing spaces.
|
||||
StringValue trimmed_url = url->Trim();
|
||||
StringValue trimmed_url = url.Trim();
|
||||
|
||||
// Search for the key in the url, ignoring malformed URLs for now.
|
||||
StringSearch key_search(key);
|
||||
StringSearch key_search(&key);
|
||||
while(trimmed_url.len > 0) {
|
||||
// Search for the key in the current substring.
|
||||
int32_t key_pos = key_search.Search(&trimmed_url);
|
||||
@@ -182,7 +182,7 @@ bool UrlParser::ParseUrlKey(const StringValue* url, UrlPart part,
|
||||
match = false;
|
||||
}
|
||||
// Advance substring beyond matching key.
|
||||
trimmed_url = trimmed_url.Substring(key_pos + key->len);
|
||||
trimmed_url = trimmed_url.Substring(key_pos + key.len);
|
||||
if (!match) {
|
||||
continue;
|
||||
}
|
||||
@@ -211,41 +211,41 @@ bool UrlParser::ParseUrlKey(const StringValue* url, UrlPart part,
|
||||
return false;
|
||||
}
|
||||
|
||||
UrlParser::UrlPart UrlParser::GetUrlPart(const StringValue* part) {
|
||||
UrlParser::UrlPart UrlParser::GetUrlPart(const StringValue& part) {
|
||||
// Quick filter on requested URL part, based on first character.
|
||||
// Hive requires the requested URL part to be all upper case.
|
||||
switch(part->ptr[0]) {
|
||||
switch(part.ptr[0]) {
|
||||
case 'A': {
|
||||
if (!part->Eq(url_authority)) return INVALID;
|
||||
if (!part.Eq(url_authority)) return INVALID;
|
||||
return AUTHORITY;
|
||||
}
|
||||
case 'F': {
|
||||
if (!part->Eq(url_file)) return INVALID;
|
||||
if (!part.Eq(url_file)) return INVALID;
|
||||
return FILE;
|
||||
}
|
||||
case 'H': {
|
||||
if (!part->Eq(url_host)) return INVALID;
|
||||
if (!part.Eq(url_host)) return INVALID;
|
||||
return HOST;
|
||||
}
|
||||
case 'P': {
|
||||
if (part->Eq(url_path)) {
|
||||
if (part.Eq(url_path)) {
|
||||
return PATH;
|
||||
} else if (part->Eq(url_protocol)) {
|
||||
} else if (part.Eq(url_protocol)) {
|
||||
return PROTOCOL;
|
||||
} else {
|
||||
return INVALID;
|
||||
}
|
||||
}
|
||||
case 'Q': {
|
||||
if (!part->Eq(url_query)) return INVALID;
|
||||
if (!part.Eq(url_query)) return INVALID;
|
||||
return QUERY;
|
||||
}
|
||||
case 'R': {
|
||||
if (!part->Eq(url_ref)) return INVALID;
|
||||
if (!part.Eq(url_ref)) return INVALID;
|
||||
return REF;
|
||||
}
|
||||
case 'U': {
|
||||
if (!part->Eq(url_userinfo)) return INVALID;
|
||||
if (!part.Eq(url_userinfo)) return INVALID;
|
||||
return USERINFO;
|
||||
}
|
||||
default: return INVALID;
|
||||
|
||||
@@ -56,18 +56,18 @@ class UrlParser {
|
||||
// Tries to parse the part from url. Places the result in result.
|
||||
// Returns false if the URL is malformed or if part is invalid. True otherwise.
|
||||
// If false is returned the contents of results are undefined.
|
||||
static bool ParseUrl(const StringValue* url, UrlPart part, StringValue* result);
|
||||
static bool ParseUrl(const StringValue& url, UrlPart part, StringValue* result);
|
||||
|
||||
// Tries to parse key from url. Places the result in result.
|
||||
// Returns false if the URL is malformed or if part is invalid. True otherwise.
|
||||
// If false is returned the contents of results are undefined.
|
||||
static bool ParseUrlKey(const StringValue* url, UrlPart part, const StringValue* key,
|
||||
static bool ParseUrlKey(const StringValue& url, UrlPart part, const StringValue& key,
|
||||
StringValue* result);
|
||||
|
||||
// Compares part against url_authority, url_file, url_host, etc.,
|
||||
// and returns the corresponding enum.
|
||||
// If part did not match any of the url part constants, returns INVALID.
|
||||
static UrlPart GetUrlPart(const StringValue* part);
|
||||
static UrlPart GetUrlPart(const StringValue& part);
|
||||
|
||||
private:
|
||||
// Constants representing parts of a URL.
|
||||
|
||||
@@ -53,71 +53,6 @@ def symbol(class_name, fn_name, templated_type = None):
|
||||
#
|
||||
# The symbol can be empty for functions that are not yet implemented.
|
||||
functions = [
|
||||
# String builtin functions
|
||||
[['substr', 'substring'], 'STRING', ['STRING', 'INT'],
|
||||
symbol('StringFunctions', 'Substring', 'int32_t')],
|
||||
[['substr', 'substring'], 'STRING', ['STRING', 'BIGINT'],
|
||||
symbol('StringFunctions', 'Substring', 'int64_t')],
|
||||
[['substr', 'substring'], 'STRING', ['STRING', 'INT', 'INT'],
|
||||
symbol('StringFunctions', 'Substring', 'int32_t')],
|
||||
[['substr', 'substring'], 'STRING', ['STRING', 'BIGINT', 'BIGINT'],
|
||||
symbol('StringFunctions', 'Substring', 'int64_t')],
|
||||
# left and right are key words, leave them out for now.
|
||||
[['strleft'], 'STRING', ['STRING', 'INT'],
|
||||
symbol('StringFunctions', 'Left', 'int32_t')],
|
||||
[['strleft'], 'STRING', ['STRING', 'BIGINT'],
|
||||
symbol('StringFunctions', 'Left', 'int64_t')],
|
||||
[['strright'], 'STRING', ['STRING', 'INT'],
|
||||
symbol('StringFunctions', 'Right', 'int32_t')],
|
||||
[['strright'], 'STRING', ['STRING', 'BIGINT'],
|
||||
symbol('StringFunctions', 'Right', 'int64_t')],
|
||||
[['space'], 'STRING', ['INT'], symbol('StringFunctions', 'Space', 'int32_t')],
|
||||
[['space'], 'STRING', ['BIGINT'], symbol('StringFunctions', 'Space', 'int64_t')],
|
||||
[['repeat'], 'STRING', ['STRING', 'INT'],
|
||||
symbol('StringFunctions', 'Repeat', 'int32_t')],
|
||||
[['repeat'], 'STRING', ['STRING', 'BIGINT'],
|
||||
symbol('StringFunctions', 'Repeat', 'int64_t')],
|
||||
[['lpad'], 'STRING', ['STRING', 'INT', 'STRING'],
|
||||
symbol('StringFunctions', 'Lpad', 'int32_t')],
|
||||
[['lpad'], 'STRING', ['STRING', 'BIGINT', 'STRING'],
|
||||
symbol('StringFunctions', 'Lpad', 'int64_t')],
|
||||
[['rpad'], 'STRING', ['STRING', 'INT', 'STRING'],
|
||||
symbol('StringFunctions', 'Rpad', 'int32_t')],
|
||||
[['rpad'], 'STRING', ['STRING', 'BIGINT', 'STRING'],
|
||||
symbol('StringFunctions', 'Rpad', 'int64_t')],
|
||||
[['length'], 'INT', ['STRING'], symbol('StringFunctions', 'Length')],
|
||||
[['char_length'], 'INT', ['STRING'], symbol('StringFunctions', 'Length')],
|
||||
[['character_length'], 'INT', ['STRING'], symbol('StringFunctions', 'Length')],
|
||||
[['lower', 'lcase'], 'STRING', ['STRING'], symbol('StringFunctions', 'Lower')],
|
||||
[['upper', 'ucase'], 'STRING', ['STRING'], symbol('StringFunctions', 'Upper')],
|
||||
[['initcap'], 'STRING', ['STRING'], symbol('StringFunctions', 'InitCap')],
|
||||
[['reverse'], 'STRING', ['STRING'], symbol('StringFunctions', 'Reverse')],
|
||||
[['translate'], 'STRING', ['STRING', 'STRING', 'STRING'],
|
||||
symbol('StringFunctions', 'Translate')],
|
||||
[['trim'], 'STRING', ['STRING'], symbol('StringFunctions', 'Trim')],
|
||||
[['ltrim'], 'STRING', ['STRING'], symbol('StringFunctions', 'Ltrim')],
|
||||
[['rtrim'], 'STRING', ['STRING'], symbol('StringFunctions', 'Rtrim')],
|
||||
[['ascii'], 'INT', ['STRING'], symbol('StringFunctions', 'Ascii')],
|
||||
[['instr'], 'INT', ['STRING', 'STRING'], symbol('StringFunctions', 'Instr')],
|
||||
[['locate'], 'INT', ['STRING', 'STRING'], symbol('StringFunctions', 'Locate')],
|
||||
[['locate'], 'INT', ['STRING', 'STRING', 'INT'],
|
||||
symbol('StringFunctions', 'LocatePos', 'int32_t')],
|
||||
[['locate'], 'INT', ['STRING', 'STRING', 'BIGINT'],
|
||||
symbol('StringFunctions', 'LocatePos', 'int64_t')],
|
||||
[['regexp_extract'], 'STRING', ['STRING', 'STRING', 'INT'],
|
||||
symbol('StringFunctions', 'RegexpExtract', 'int32_t')],
|
||||
[['regexp_extract'], 'STRING', ['STRING', 'STRING', 'BIGINT'],
|
||||
symbol('StringFunctions', 'RegexpExtract', 'int64_t')],
|
||||
[['regexp_replace'], 'STRING', ['STRING', 'STRING', 'STRING'],
|
||||
symbol('StringFunctions', 'RegexpReplace')],
|
||||
[['concat'], 'STRING', ['STRING', '...'], symbol('StringFunctions', 'Concat')],
|
||||
[['concat_ws'], 'STRING', ['STRING', 'STRING', '...'],
|
||||
symbol('StringFunctions', 'ConcatWs')],
|
||||
[['find_in_set'], 'INT', ['STRING', 'STRING'], symbol('StringFunctions', 'FindInSet')],
|
||||
[['parse_url'], 'STRING', ['STRING', 'STRING'], symbol('StringFunctions', 'ParseUrl')],
|
||||
[['parse_url'], 'STRING', ['STRING', 'STRING', 'STRING'],
|
||||
symbol('StringFunctions', 'ParseUrlKey')],
|
||||
|
||||
# Utility functions
|
||||
[['current_database'], 'STRING', [], symbol('UtilityFunctions', 'CurrentDatabase')],
|
||||
[['user'], 'STRING', [], symbol('UtilityFunctions', 'User')],
|
||||
@@ -557,4 +492,52 @@ udf_functions = [
|
||||
'impala::DecimalFunctions::TruncateTo'],
|
||||
[['truncate'], 'DECIMAL', ['DECIMAL', 'BIGINT'],
|
||||
'impala::DecimalFunctions::TruncateTo'],
|
||||
|
||||
# String builtin functions
|
||||
[['substr', 'substring'], 'STRING', ['STRING', 'BIGINT'],
|
||||
'impala::StringFunctions::Substring'],
|
||||
[['substr', 'substring'], 'STRING', ['STRING', 'BIGINT', 'BIGINT'],
|
||||
'impala::StringFunctions::Substring'],
|
||||
# left and right are key words, leave them out for now.
|
||||
[['strleft'], 'STRING', ['STRING', 'BIGINT'], 'impala::StringFunctions::Left'],
|
||||
[['strright'], 'STRING', ['STRING', 'BIGINT'], 'impala::StringFunctions::Right'],
|
||||
[['space'], 'STRING', ['BIGINT'], 'impala::StringFunctions::Space'],
|
||||
[['repeat'], 'STRING', ['STRING', 'BIGINT'], 'impala::StringFunctions::Repeat'],
|
||||
[['lpad'], 'STRING', ['STRING', 'BIGINT', 'STRING'], 'impala::StringFunctions::Lpad'],
|
||||
[['rpad'], 'STRING', ['STRING', 'BIGINT', 'STRING'], 'impala::StringFunctions::Rpad'],
|
||||
[['length'], 'INT', ['STRING'], 'impala::StringFunctions::Length'],
|
||||
[['char_length'], 'INT', ['STRING'], 'impala::StringFunctions::Length'],
|
||||
[['character_length'], 'INT', ['STRING'], 'impala::StringFunctions::Length'],
|
||||
[['lower', 'lcase'], 'STRING', ['STRING'], 'impala::StringFunctions::Lower'],
|
||||
[['upper', 'ucase'], 'STRING', ['STRING'], 'impala::StringFunctions::Upper'],
|
||||
[['initcap'], 'STRING', ['STRING'], 'impala::StringFunctions::InitCap'],
|
||||
[['reverse'], 'STRING', ['STRING'], 'impala::StringFunctions::Reverse'],
|
||||
[['translate'], 'STRING', ['STRING', 'STRING', 'STRING'],
|
||||
'impala::StringFunctions::Translate'],
|
||||
[['trim'], 'STRING', ['STRING'], 'impala::StringFunctions::Trim'],
|
||||
[['ltrim'], 'STRING', ['STRING'], 'impala::StringFunctions::Ltrim'],
|
||||
[['rtrim'], 'STRING', ['STRING'], 'impala::StringFunctions::Rtrim'],
|
||||
[['ascii'], 'INT', ['STRING'], 'impala::StringFunctions::Ascii'],
|
||||
[['instr'], 'INT', ['STRING', 'STRING'], 'impala::StringFunctions::Instr'],
|
||||
[['locate'], 'INT', ['STRING', 'STRING'], 'impala::StringFunctions::Locate'],
|
||||
[['locate'], 'INT', ['STRING', 'STRING', 'BIGINT'],
|
||||
'impala::StringFunctions::LocatePos'],
|
||||
[['regexp_extract'], 'STRING', ['STRING', 'STRING', 'BIGINT'],
|
||||
'impala::StringFunctions::RegexpExtract',
|
||||
'_ZN6impala15StringFunctions13RegexpPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',
|
||||
'_ZN6impala15StringFunctions11RegexpCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE'],
|
||||
[['regexp_replace'], 'STRING', ['STRING', 'STRING', 'STRING'],
|
||||
'impala::StringFunctions::RegexpReplace',
|
||||
'_ZN6impala15StringFunctions13RegexpPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',
|
||||
'_ZN6impala15StringFunctions11RegexpCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE'],
|
||||
[['concat'], 'STRING', ['STRING', '...'], 'impala::StringFunctions::Concat'],
|
||||
[['concat_ws'], 'STRING', ['STRING', 'STRING', '...'],
|
||||
'impala::StringFunctions::ConcatWs'],
|
||||
[['find_in_set'], 'INT', ['STRING', 'STRING'], 'impala::StringFunctions::FindInSet'],
|
||||
[['parse_url'], 'STRING', ['STRING', 'STRING'], 'impala::StringFunctions::ParseUrl',
|
||||
'_ZN6impala15StringFunctions15ParseUrlPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',
|
||||
'_ZN6impala15StringFunctions13ParseUrlCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE'],
|
||||
[['parse_url'], 'STRING', ['STRING', 'STRING', 'STRING'], 'impala::StringFunctions::ParseUrlKey',
|
||||
'_ZN6impala15StringFunctions15ParseUrlPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',
|
||||
'_ZN6impala15StringFunctions13ParseUrlCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE'],
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user