Convert string functions to UDF interface

This also switches to using the re2 library for regular expression
functions instead of boost.

Change-Id: I4c3ae72ff2f7cbd5b1a2be719275f1b2e25f8ab2
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/3412
Reviewed-by: Skye Wanderman-Milne <skye@cloudera.com>
Tested-by: Skye Wanderman-Milne <skye@cloudera.com>
This commit is contained in:
Skye Wanderman-Milne
2014-07-09 10:44:53 -07:00
committed by Nong Li
parent 7a0cc27fd1
commit f062a22997
10 changed files with 531 additions and 641 deletions

View File

@@ -61,6 +61,7 @@ set(IR_DEPENDENT_FILES
../exprs/expr-ir.cc
../exprs/math-functions.cc
../exprs/operators.cc
../exprs/string-functions.cc
../exprs/udf-builtins.cc
../exprs/like-predicate.cc
../runtime/string-value-ir.cc

View File

@@ -29,6 +29,7 @@
#include "exprs/expr-ir.cc"
#include "exprs/math-functions.cc"
#include "exprs/operators.cc"
#include "exprs/string-functions.cc"
#include "exprs/udf-builtins.cc"
#include "runtime/string-value-ir.cc"
#include "udf/udf.cc"

View File

@@ -157,6 +157,10 @@ class AnyValUtil {
}
}
static std::string ToString(const StringVal& v) {
return std::string(reinterpret_cast<char*>(v.ptr), v.len);
}
static StringVal FromString(FunctionContext* ctx, const std::string& s) {
return FromBuffer(ctx, s.c_str(), s.size());
}

View File

@@ -1438,6 +1438,7 @@ TEST_F(ExprTest, StringFunctions) {
TestStringValue("repeat('', cast(6 as bigint))", "");
TestStringValue("repeat('ab', 0)", "");
TestStringValue("repeat('ab', -1)", "");
TestStringValue("repeat('ab', -100)", "");
TestStringValue("repeat('ab', 1)", "ab");
TestStringValue("repeat('ab', cast(6 as bigint))", "abababababab");
TestIsNull("repeat(NULL, 6)", TYPE_STRING);
@@ -1581,8 +1582,8 @@ TEST_F(ExprTest, StringRegexpFunctions) {
// Test finding of leftmost maximal match.
TestStringValue("regexp_extract('I001=-200,I003=-210,I007=0', 'I001=-?[0-9]+', 0)",
"I001=-200");
// Invalid regex patter, unmatched parenthesis.
TestIsNull("regexp_extract('abxcy1234a', '(/.', 0)", TYPE_STRING);
// Invalid regex pattern, unmatched parenthesis.
TestError("regexp_extract('abxcy1234a', '(/.', 0)");
// NULL arguments.
TestIsNull("regexp_extract(NULL, 'a.x', 2)", TYPE_STRING);
TestIsNull("regexp_extract('abxcy1234a', NULL, 2)", TYPE_STRING);
@@ -1602,8 +1603,8 @@ TEST_F(ExprTest, StringRegexpFunctions) {
TestStringValue("regexp_replace('', 'err', '')", "");
TestStringValue("regexp_replace('', '', 'abc')", "abc");
TestStringValue("regexp_replace('axcaycazc', '', 'r')", "rarxrcraryrcrarzrcr");
// Invalid regex patter, unmatched parenthesis.
TestIsNull("regexp_replace('abxcy1234a', '(/.', 'x')", TYPE_STRING);
// Invalid regex pattern, unmatched parenthesis.
TestError("regexp_replace('abxcy1234a', '(/.', 'x')");
// NULL arguments.
TestIsNull("regexp_replace(NULL, 'a.*', 'abcde')", TYPE_STRING);
TestIsNull("regexp_replace('axcaycazc', NULL, 'abcde')", TYPE_STRING);
@@ -1810,30 +1811,30 @@ TEST_F(ExprTest, StringParseUrlFunction) {
// Invalid part parameters.
// All characters in the part parameter must be uppercase (consistent with Hive).
TestIsNull("parse_url('http://example.com', 'authority')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'Authority')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'AUTHORITYXYZ')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'file')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'File')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'FILEXYZ')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'host')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'Host')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'HOSTXYZ')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'path')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'Path')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'PATHXYZ')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'protocol')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'Protocol')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'PROTOCOLXYZ')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'query')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'Query')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'QUERYXYZ')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'ref')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'Ref')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'REFXYZ')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'userinfo')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'Userinfo')", TYPE_STRING);
TestIsNull("parse_url('http://example.com', 'USERINFOXYZ')", TYPE_STRING);
TestError("parse_url('http://example.com', 'authority')");
TestError("parse_url('http://example.com', 'Authority')");
TestError("parse_url('http://example.com', 'AUTHORITYXYZ')");
TestError("parse_url('http://example.com', 'file')");
TestError("parse_url('http://example.com', 'File')");
TestError("parse_url('http://example.com', 'FILEXYZ')");
TestError("parse_url('http://example.com', 'host')");
TestError("parse_url('http://example.com', 'Host')");
TestError("parse_url('http://example.com', 'HOSTXYZ')");
TestError("parse_url('http://example.com', 'path')");
TestError("parse_url('http://example.com', 'Path')");
TestError("parse_url('http://example.com', 'PATHXYZ')");
TestError("parse_url('http://example.com', 'protocol')");
TestError("parse_url('http://example.com', 'Protocol')");
TestError("parse_url('http://example.com', 'PROTOCOLXYZ')");
TestError("parse_url('http://example.com', 'query')");
TestError("parse_url('http://example.com', 'Query')");
TestError("parse_url('http://example.com', 'QUERYXYZ')");
TestError("parse_url('http://example.com', 'ref')");
TestError("parse_url('http://example.com', 'Ref')");
TestError("parse_url('http://example.com', 'REFXYZ')");
TestError("parse_url('http://example.com', 'userinfo')");
TestError("parse_url('http://example.com', 'Userinfo')");
TestError("parse_url('http://example.com', 'USERINFOXYZ')");
// NULL arguments.
TestIsNull("parse_url(NULL, 'AUTHORITY')", TYPE_STRING);
@@ -1888,8 +1889,8 @@ TEST_F(ExprTest, StringParseUrlFunction) {
"index.html?test=true&name=networking&op=true', 'PROTOCOL', 'name')", TYPE_STRING);
TestIsNull("parse_url('http://example.com:80/docs/books/tutorial/"
"index.html?test=true&name=networking&op=true', 'REF', 'name')", TYPE_STRING);
TestIsNull("parse_url('http://example.com:80/docs/books/tutorial/"
"index.html?test=true&name=networking&op=true', 'XYZ', 'name')", TYPE_STRING);
TestError("parse_url('http://example.com:80/docs/books/tutorial/"
"index.html?test=true&name=networking&op=true', 'XYZ', 'name')");
}
TEST_F(ExprTest, UtilityFunctions) {

View File

@@ -1022,7 +1022,7 @@ void Expr::InitBuiltinsDummy() {
DecimalOperators::CastToDecimalVal(NULL, DecimalVal::null());
MathFunctions::Pi(NULL);
Operators::Add_IntVal_IntVal(NULL, IntVal::null(), IntVal::null());
StringFunctions::Length(NULL, NULL);
StringFunctions::Length(NULL, StringVal::null());
TimestampFunctions::Year(NULL, TimestampVal::null());
UdfBuiltins::Pi(NULL);
UtilityFunctions::Pid(NULL, NULL);

File diff suppressed because it is too large Load Diff

View File

@@ -19,6 +19,8 @@
#include "runtime/string-value.h"
#include "runtime/string-search.h"
using namespace impala_udf;
namespace impala {
class Expr;
@@ -27,33 +29,51 @@ class TupleRow;
class StringFunctions {
public:
template <class T> static void* Substring(Expr* e, TupleRow* row);
template <class T> static void* Left(Expr* e, TupleRow* row);
template <class T> static void* Right(Expr* e, TupleRow* row);
template <class T> static void* Space(Expr* e, TupleRow* row);
template <class T> static void* Repeat(Expr* e, TupleRow* row);
template <class T> static void* Lpad(Expr* e, TupleRow* row);
template <class T> static void* Rpad(Expr* e, TupleRow* row);
static void* Length(Expr* e, TupleRow* row);
static void* Lower(Expr* e, TupleRow* row);
static void* Upper(Expr* e, TupleRow* row);
static void* InitCap(Expr* e, TupleRow* row);
static void* Reverse(Expr* e, TupleRow* row);
static void* Translate(Expr* e, TupleRow* row);
static void* Trim(Expr* e, TupleRow* row);
static void* Ltrim(Expr* e, TupleRow* row);
static void* Rtrim(Expr* e, TupleRow* row);
static void* Ascii(Expr* e, TupleRow* row);
static void* Instr(Expr* e, TupleRow* row);
static void* Locate(Expr* e, TupleRow* row);
template <class T> static void* LocatePos(Expr* e, TupleRow* row);
template <class T> static void* RegexpExtract(Expr* e, TupleRow* row);
static void* RegexpReplace(Expr* e, TupleRow* row);
static void* Concat(Expr* e, TupleRow* row);
static void* ConcatWs(Expr* e, TupleRow* row);
static void* FindInSet(Expr* e, TupleRow* row);
static void* ParseUrl(Expr* e, TupleRow* row);
static void* ParseUrlKey(Expr* e, TupleRow* row);
static StringVal Substring(FunctionContext*, const StringVal& str, const BigIntVal& pos,
const BigIntVal& len);
static StringVal Substring(FunctionContext*, const StringVal& str, const BigIntVal& pos);
static StringVal Left(FunctionContext*, const StringVal& str, const BigIntVal& len);
static StringVal Right(FunctionContext*, const StringVal& str, const BigIntVal& len);
static StringVal Space(FunctionContext*, const BigIntVal& len);
static StringVal Repeat(FunctionContext*, const StringVal& str, const BigIntVal& n);
static StringVal Lpad(FunctionContext*, const StringVal& str, const BigIntVal& len,
const StringVal& pad);
static StringVal Rpad(FunctionContext*, const StringVal& str, const BigIntVal&,
const StringVal& pad);
static IntVal Length(FunctionContext*, const StringVal& str);
static StringVal Lower(FunctionContext*, const StringVal& str);
static StringVal Upper(FunctionContext*, const StringVal& str);
static StringVal InitCap(FunctionContext*, const StringVal& str);
static StringVal Reverse(FunctionContext*, const StringVal& str);
static StringVal Translate(FunctionContext*, const StringVal& str, const StringVal& src,
const StringVal& dst);
static StringVal Trim(FunctionContext*, const StringVal& str);
static StringVal Ltrim(FunctionContext*, const StringVal& str);
static StringVal Rtrim(FunctionContext*, const StringVal& str);
static IntVal Ascii(FunctionContext*, const StringVal& str);
static IntVal Instr(FunctionContext*, const StringVal& str, const StringVal& substr);
static IntVal Locate(FunctionContext*, const StringVal& substr, const StringVal& str);
static IntVal LocatePos(FunctionContext*, const StringVal& substr, const StringVal& str,
const BigIntVal& start_pos);
static void RegexpPrepare(FunctionContext*, FunctionContext::FunctionStateScope);
static void RegexpClose(FunctionContext*, FunctionContext::FunctionStateScope);
static StringVal RegexpExtract(FunctionContext*, const StringVal& str,
const StringVal& pattern, const BigIntVal& index);
static StringVal RegexpReplace(FunctionContext*, const StringVal& str,
const StringVal& pattern, const StringVal& replace);
static StringVal Concat(FunctionContext*, int num_children, const StringVal* strs);
static StringVal ConcatWs(FunctionContext*, const StringVal& sep, int num_children,
const StringVal* strs);
static IntVal FindInSet(FunctionContext*, const StringVal& str,
const StringVal& str_set);
static void ParseUrlPrepare(FunctionContext*, FunctionContext::FunctionStateScope);
static StringVal ParseUrl(FunctionContext*, const StringVal& url, const StringVal& part);
static StringVal ParseUrlKey(FunctionContext*, const StringVal& url,
const StringVal& key, const StringVal& part);
static void ParseUrlClose(FunctionContext*, FunctionContext::FunctionStateScope);
};
}

View File

@@ -40,11 +40,11 @@ const StringSearch UrlParser::colon_search(&colon);
const StringSearch UrlParser::question_search(&question);
const StringSearch UrlParser::hash_search(&hash);
bool UrlParser::ParseUrl(const StringValue* url, UrlPart part, StringValue* result) {
bool UrlParser::ParseUrl(const StringValue& url, UrlPart part, StringValue* result) {
result->ptr = NULL;
result->len = 0;
// Remove leading and trailing spaces.
StringValue trimmed_url = url->Trim();
StringValue trimmed_url = url.Trim();
// All parts require checking for the protocol.
int32_t protocol_pos = protocol_search.Search(&trimmed_url);
@@ -157,17 +157,17 @@ bool UrlParser::ParseUrl(const StringValue* url, UrlPart part, StringValue* resu
return true;
}
bool UrlParser::ParseUrlKey(const StringValue* url, UrlPart part,
const StringValue* key, StringValue* result) {
bool UrlParser::ParseUrlKey(const StringValue& url, UrlPart part,
const StringValue& key, StringValue* result) {
// Part must be query to ask for a specific query key.
if (part != QUERY) {
return false;
}
// Remove leading and trailing spaces.
StringValue trimmed_url = url->Trim();
StringValue trimmed_url = url.Trim();
// Search for the key in the url, ignoring malformed URLs for now.
StringSearch key_search(key);
StringSearch key_search(&key);
while(trimmed_url.len > 0) {
// Search for the key in the current substring.
int32_t key_pos = key_search.Search(&trimmed_url);
@@ -182,7 +182,7 @@ bool UrlParser::ParseUrlKey(const StringValue* url, UrlPart part,
match = false;
}
// Advance substring beyond matching key.
trimmed_url = trimmed_url.Substring(key_pos + key->len);
trimmed_url = trimmed_url.Substring(key_pos + key.len);
if (!match) {
continue;
}
@@ -211,41 +211,41 @@ bool UrlParser::ParseUrlKey(const StringValue* url, UrlPart part,
return false;
}
UrlParser::UrlPart UrlParser::GetUrlPart(const StringValue* part) {
UrlParser::UrlPart UrlParser::GetUrlPart(const StringValue& part) {
// Quick filter on requested URL part, based on first character.
// Hive requires the requested URL part to be all upper case.
switch(part->ptr[0]) {
switch(part.ptr[0]) {
case 'A': {
if (!part->Eq(url_authority)) return INVALID;
if (!part.Eq(url_authority)) return INVALID;
return AUTHORITY;
}
case 'F': {
if (!part->Eq(url_file)) return INVALID;
if (!part.Eq(url_file)) return INVALID;
return FILE;
}
case 'H': {
if (!part->Eq(url_host)) return INVALID;
if (!part.Eq(url_host)) return INVALID;
return HOST;
}
case 'P': {
if (part->Eq(url_path)) {
if (part.Eq(url_path)) {
return PATH;
} else if (part->Eq(url_protocol)) {
} else if (part.Eq(url_protocol)) {
return PROTOCOL;
} else {
return INVALID;
}
}
case 'Q': {
if (!part->Eq(url_query)) return INVALID;
if (!part.Eq(url_query)) return INVALID;
return QUERY;
}
case 'R': {
if (!part->Eq(url_ref)) return INVALID;
if (!part.Eq(url_ref)) return INVALID;
return REF;
}
case 'U': {
if (!part->Eq(url_userinfo)) return INVALID;
if (!part.Eq(url_userinfo)) return INVALID;
return USERINFO;
}
default: return INVALID;

View File

@@ -56,18 +56,18 @@ class UrlParser {
// Tries to parse the part from url. Places the result in result.
// Returns false if the URL is malformed or if part is invalid. True otherwise.
// If false is returned the contents of results are undefined.
static bool ParseUrl(const StringValue* url, UrlPart part, StringValue* result);
static bool ParseUrl(const StringValue& url, UrlPart part, StringValue* result);
// Tries to parse key from url. Places the result in result.
// Returns false if the URL is malformed or if part is invalid. True otherwise.
// If false is returned the contents of results are undefined.
static bool ParseUrlKey(const StringValue* url, UrlPart part, const StringValue* key,
static bool ParseUrlKey(const StringValue& url, UrlPart part, const StringValue& key,
StringValue* result);
// Compares part against url_authority, url_file, url_host, etc.,
// and returns the corresponding enum.
// If part did not match any of the url part constants, returns INVALID.
static UrlPart GetUrlPart(const StringValue* part);
static UrlPart GetUrlPart(const StringValue& part);
private:
// Constants representing parts of a URL.

View File

@@ -53,71 +53,6 @@ def symbol(class_name, fn_name, templated_type = None):
#
# The symbol can be empty for functions that are not yet implemented.
functions = [
# String builtin functions
[['substr', 'substring'], 'STRING', ['STRING', 'INT'],
symbol('StringFunctions', 'Substring', 'int32_t')],
[['substr', 'substring'], 'STRING', ['STRING', 'BIGINT'],
symbol('StringFunctions', 'Substring', 'int64_t')],
[['substr', 'substring'], 'STRING', ['STRING', 'INT', 'INT'],
symbol('StringFunctions', 'Substring', 'int32_t')],
[['substr', 'substring'], 'STRING', ['STRING', 'BIGINT', 'BIGINT'],
symbol('StringFunctions', 'Substring', 'int64_t')],
# left and right are key words, leave them out for now.
[['strleft'], 'STRING', ['STRING', 'INT'],
symbol('StringFunctions', 'Left', 'int32_t')],
[['strleft'], 'STRING', ['STRING', 'BIGINT'],
symbol('StringFunctions', 'Left', 'int64_t')],
[['strright'], 'STRING', ['STRING', 'INT'],
symbol('StringFunctions', 'Right', 'int32_t')],
[['strright'], 'STRING', ['STRING', 'BIGINT'],
symbol('StringFunctions', 'Right', 'int64_t')],
[['space'], 'STRING', ['INT'], symbol('StringFunctions', 'Space', 'int32_t')],
[['space'], 'STRING', ['BIGINT'], symbol('StringFunctions', 'Space', 'int64_t')],
[['repeat'], 'STRING', ['STRING', 'INT'],
symbol('StringFunctions', 'Repeat', 'int32_t')],
[['repeat'], 'STRING', ['STRING', 'BIGINT'],
symbol('StringFunctions', 'Repeat', 'int64_t')],
[['lpad'], 'STRING', ['STRING', 'INT', 'STRING'],
symbol('StringFunctions', 'Lpad', 'int32_t')],
[['lpad'], 'STRING', ['STRING', 'BIGINT', 'STRING'],
symbol('StringFunctions', 'Lpad', 'int64_t')],
[['rpad'], 'STRING', ['STRING', 'INT', 'STRING'],
symbol('StringFunctions', 'Rpad', 'int32_t')],
[['rpad'], 'STRING', ['STRING', 'BIGINT', 'STRING'],
symbol('StringFunctions', 'Rpad', 'int64_t')],
[['length'], 'INT', ['STRING'], symbol('StringFunctions', 'Length')],
[['char_length'], 'INT', ['STRING'], symbol('StringFunctions', 'Length')],
[['character_length'], 'INT', ['STRING'], symbol('StringFunctions', 'Length')],
[['lower', 'lcase'], 'STRING', ['STRING'], symbol('StringFunctions', 'Lower')],
[['upper', 'ucase'], 'STRING', ['STRING'], symbol('StringFunctions', 'Upper')],
[['initcap'], 'STRING', ['STRING'], symbol('StringFunctions', 'InitCap')],
[['reverse'], 'STRING', ['STRING'], symbol('StringFunctions', 'Reverse')],
[['translate'], 'STRING', ['STRING', 'STRING', 'STRING'],
symbol('StringFunctions', 'Translate')],
[['trim'], 'STRING', ['STRING'], symbol('StringFunctions', 'Trim')],
[['ltrim'], 'STRING', ['STRING'], symbol('StringFunctions', 'Ltrim')],
[['rtrim'], 'STRING', ['STRING'], symbol('StringFunctions', 'Rtrim')],
[['ascii'], 'INT', ['STRING'], symbol('StringFunctions', 'Ascii')],
[['instr'], 'INT', ['STRING', 'STRING'], symbol('StringFunctions', 'Instr')],
[['locate'], 'INT', ['STRING', 'STRING'], symbol('StringFunctions', 'Locate')],
[['locate'], 'INT', ['STRING', 'STRING', 'INT'],
symbol('StringFunctions', 'LocatePos', 'int32_t')],
[['locate'], 'INT', ['STRING', 'STRING', 'BIGINT'],
symbol('StringFunctions', 'LocatePos', 'int64_t')],
[['regexp_extract'], 'STRING', ['STRING', 'STRING', 'INT'],
symbol('StringFunctions', 'RegexpExtract', 'int32_t')],
[['regexp_extract'], 'STRING', ['STRING', 'STRING', 'BIGINT'],
symbol('StringFunctions', 'RegexpExtract', 'int64_t')],
[['regexp_replace'], 'STRING', ['STRING', 'STRING', 'STRING'],
symbol('StringFunctions', 'RegexpReplace')],
[['concat'], 'STRING', ['STRING', '...'], symbol('StringFunctions', 'Concat')],
[['concat_ws'], 'STRING', ['STRING', 'STRING', '...'],
symbol('StringFunctions', 'ConcatWs')],
[['find_in_set'], 'INT', ['STRING', 'STRING'], symbol('StringFunctions', 'FindInSet')],
[['parse_url'], 'STRING', ['STRING', 'STRING'], symbol('StringFunctions', 'ParseUrl')],
[['parse_url'], 'STRING', ['STRING', 'STRING', 'STRING'],
symbol('StringFunctions', 'ParseUrlKey')],
# Utility functions
[['current_database'], 'STRING', [], symbol('UtilityFunctions', 'CurrentDatabase')],
[['user'], 'STRING', [], symbol('UtilityFunctions', 'User')],
@@ -557,4 +492,52 @@ udf_functions = [
'impala::DecimalFunctions::TruncateTo'],
[['truncate'], 'DECIMAL', ['DECIMAL', 'BIGINT'],
'impala::DecimalFunctions::TruncateTo'],
# String builtin functions
[['substr', 'substring'], 'STRING', ['STRING', 'BIGINT'],
'impala::StringFunctions::Substring'],
[['substr', 'substring'], 'STRING', ['STRING', 'BIGINT', 'BIGINT'],
'impala::StringFunctions::Substring'],
# left and right are key words, leave them out for now.
[['strleft'], 'STRING', ['STRING', 'BIGINT'], 'impala::StringFunctions::Left'],
[['strright'], 'STRING', ['STRING', 'BIGINT'], 'impala::StringFunctions::Right'],
[['space'], 'STRING', ['BIGINT'], 'impala::StringFunctions::Space'],
[['repeat'], 'STRING', ['STRING', 'BIGINT'], 'impala::StringFunctions::Repeat'],
[['lpad'], 'STRING', ['STRING', 'BIGINT', 'STRING'], 'impala::StringFunctions::Lpad'],
[['rpad'], 'STRING', ['STRING', 'BIGINT', 'STRING'], 'impala::StringFunctions::Rpad'],
[['length'], 'INT', ['STRING'], 'impala::StringFunctions::Length'],
[['char_length'], 'INT', ['STRING'], 'impala::StringFunctions::Length'],
[['character_length'], 'INT', ['STRING'], 'impala::StringFunctions::Length'],
[['lower', 'lcase'], 'STRING', ['STRING'], 'impala::StringFunctions::Lower'],
[['upper', 'ucase'], 'STRING', ['STRING'], 'impala::StringFunctions::Upper'],
[['initcap'], 'STRING', ['STRING'], 'impala::StringFunctions::InitCap'],
[['reverse'], 'STRING', ['STRING'], 'impala::StringFunctions::Reverse'],
[['translate'], 'STRING', ['STRING', 'STRING', 'STRING'],
'impala::StringFunctions::Translate'],
[['trim'], 'STRING', ['STRING'], 'impala::StringFunctions::Trim'],
[['ltrim'], 'STRING', ['STRING'], 'impala::StringFunctions::Ltrim'],
[['rtrim'], 'STRING', ['STRING'], 'impala::StringFunctions::Rtrim'],
[['ascii'], 'INT', ['STRING'], 'impala::StringFunctions::Ascii'],
[['instr'], 'INT', ['STRING', 'STRING'], 'impala::StringFunctions::Instr'],
[['locate'], 'INT', ['STRING', 'STRING'], 'impala::StringFunctions::Locate'],
[['locate'], 'INT', ['STRING', 'STRING', 'BIGINT'],
'impala::StringFunctions::LocatePos'],
[['regexp_extract'], 'STRING', ['STRING', 'STRING', 'BIGINT'],
'impala::StringFunctions::RegexpExtract',
'_ZN6impala15StringFunctions13RegexpPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',
'_ZN6impala15StringFunctions11RegexpCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE'],
[['regexp_replace'], 'STRING', ['STRING', 'STRING', 'STRING'],
'impala::StringFunctions::RegexpReplace',
'_ZN6impala15StringFunctions13RegexpPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',
'_ZN6impala15StringFunctions11RegexpCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE'],
[['concat'], 'STRING', ['STRING', '...'], 'impala::StringFunctions::Concat'],
[['concat_ws'], 'STRING', ['STRING', 'STRING', '...'],
'impala::StringFunctions::ConcatWs'],
[['find_in_set'], 'INT', ['STRING', 'STRING'], 'impala::StringFunctions::FindInSet'],
[['parse_url'], 'STRING', ['STRING', 'STRING'], 'impala::StringFunctions::ParseUrl',
'_ZN6impala15StringFunctions15ParseUrlPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',
'_ZN6impala15StringFunctions13ParseUrlCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE'],
[['parse_url'], 'STRING', ['STRING', 'STRING', 'STRING'], 'impala::StringFunctions::ParseUrlKey',
'_ZN6impala15StringFunctions15ParseUrlPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',
'_ZN6impala15StringFunctions13ParseUrlCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE'],
]