mirror of
https://github.com/apache/impala.git
synced 2025-12-30 12:02:10 -05:00
Adds a new ExprRewriteRule for replacing constant expressions with their literal equivalent via BE evaluation. Applies the new rule together with the existing ones on the parse tree, after analysis. Limitations - Constant folding is applied on the unresolved expressions. As a result, it only works for expressions that are constant within a single query block, as opposed to expressions that may become constant after fully substituting inline-view exprs. - Exprs are not normalized, so some opportunities for constant folding are missed for certain expr-tree shapes. This patch includes the following interesting changes: - Introduces a timestamp literal that can only be produced by constant folding (not expressible directly via SQL). - To make sure that rewrites have no user-visible effect, the original result types and column labels of the top-level statement are restored after the rewrites are performed. - Does not fold exprs if their evaluation resulted in a warning or error, or if the resulting value is not representable by corresponding FE LiteralExpr. - Fixes an existing issue with converting strings between the FE/BE. String produced in the BE that have characters with a value > 127 are not correctly deserialized into a Java String via thrift. We detect this case during constant folding and abandon folding of such exprs. - Fixes several issues with detecting/reporting errors in NativeEvalConstExprs(). - Cleans up ExprContext::GetValue() into ExprContext::GetConstantValue() which clarifies its only use of evaluating exprs from the FE. Testing: - Modifies expr-test.cc to run all tests through the constant folding path. - Adds basic planner and rewrite rule tests. - Exhaustive test run passed Change-Id: If672b703db1ba0bfc26e5b9130161798b40a69e9 Reviewed-on: http://gerrit.cloudera.org:8080/5109 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: Internal Jenkins
541 lines
9.9 KiB
Plaintext
541 lines
9.9 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Test identity functions
|
|
select identity(true);
|
|
---- TYPES
|
|
boolean
|
|
---- RESULTS
|
|
true
|
|
====
|
|
---- QUERY
|
|
select identity(cast(10 as tinyint));
|
|
---- TYPES
|
|
tinyint
|
|
---- RESULTS
|
|
10
|
|
====
|
|
---- QUERY
|
|
select identity(cast(10 as smallint));
|
|
---- TYPES
|
|
smallint
|
|
---- RESULTS
|
|
10
|
|
====
|
|
---- QUERY
|
|
select identity(cast(10 as int));
|
|
---- TYPES
|
|
int
|
|
---- RESULTS
|
|
10
|
|
====
|
|
---- QUERY
|
|
select identity(cast(10 as bigint));
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
10
|
|
====
|
|
---- QUERY
|
|
select identity(cast(10.0 as float));
|
|
---- TYPES
|
|
float
|
|
---- RESULTS
|
|
10
|
|
====
|
|
---- QUERY
|
|
select identity(cast(10.0 as double));
|
|
---- TYPES
|
|
double
|
|
---- RESULTS
|
|
10
|
|
====
|
|
---- QUERY
|
|
select identity("why hello there");
|
|
---- TYPES
|
|
string
|
|
---- RESULTS
|
|
'why hello there'
|
|
====
|
|
---- QUERY
|
|
select identity(now());
|
|
---- TYPES
|
|
timestamp
|
|
====
|
|
---- QUERY
|
|
select identity(cast(1 as decimal(9,0)));
|
|
---- TYPES
|
|
decimal
|
|
---- RESULTS
|
|
1
|
|
====
|
|
---- QUERY
|
|
select identity(cast(1 as decimal(18,1)));
|
|
---- TYPES
|
|
decimal
|
|
---- RESULTS
|
|
1.0
|
|
====
|
|
---- QUERY
|
|
select identity(cast(1 as decimal(38,10)));
|
|
---- TYPES
|
|
decimal
|
|
---- RESULTS
|
|
1.0000000000
|
|
====
|
|
---- QUERY
|
|
select identity(NULL);
|
|
---- TYPES
|
|
boolean
|
|
---- RESULTS
|
|
NULL
|
|
====
|
|
---- QUERY
|
|
select constant_timestamp();
|
|
---- TYPES
|
|
timestamp
|
|
---- RESULTS
|
|
2013-10-09 00:00:00.000000001
|
|
====
|
|
---- QUERY
|
|
select from_utc_timestamp(constant_timestamp(), "UTC");
|
|
---- TYPES
|
|
timestamp
|
|
---- RESULTS
|
|
2013-10-09 00:00:00.000000001
|
|
====
|
|
---- QUERY
|
|
# Test UDFs with different arguments
|
|
select all_types_fn("1", true, 2, 3, 4, 5, 6.0, 7.0, cast(8 as decimal(2,0)));
|
|
---- TYPES
|
|
int
|
|
---- RESULTS
|
|
37
|
|
====
|
|
---- QUERY
|
|
select no_args();
|
|
---- TYPES
|
|
string
|
|
---- RESULTS
|
|
'string'
|
|
====
|
|
---- QUERY
|
|
# Test UDFs over tables
|
|
select sum(identity(bigint_col)) from functional.alltypes
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
328500
|
|
====
|
|
---- QUERY
|
|
select identity(a) from functional.tinytable;
|
|
---- TYPES
|
|
string
|
|
---- RESULTS
|
|
'aaaaaaa'
|
|
'ccccc'
|
|
'eeeeeeee'
|
|
====
|
|
---- QUERY
|
|
select identity(d1),
|
|
identity(cast(d3 as decimal(38,10))), identity(cast(d5 as decimal(38,10)))
|
|
from functional.decimal_tbl;
|
|
---- TYPES
|
|
decimal,decimal,decimal
|
|
---- RESULTS
|
|
1234,1.2345678900,12345.7890000000
|
|
2345,12.3456789000,3.1410000000
|
|
12345,123.4567890000,11.2200000000
|
|
12345,1234.5678900000,0.1000000000
|
|
132842,12345.6789000000,0.7788900000
|
|
====
|
|
---- QUERY
|
|
select identity(bool_col), identity(tinyint_col),
|
|
identity(smallint_col), identity(int_col),
|
|
identity(bigint_col), identity(float_col),
|
|
identity(double_col), identity(string_col),
|
|
identity(timestamp_col), identity(year)
|
|
from functional.alltypestiny;
|
|
---- TYPES
|
|
boolean, tinyint, smallint, int, bigint, float, double, string, timestamp, int
|
|
---- RESULTS
|
|
true,0,0,0,0,0,0,'0',2009-02-01 00:00:00,2009
|
|
false,1,1,1,10,1.100000023841858,10.1,'1',2009-02-01 00:01:00,2009
|
|
true,0,0,0,0,0,0,'0',2009-04-01 00:00:00,2009
|
|
false,1,1,1,10,1.100000023841858,10.1,'1',2009-04-01 00:01:00,2009
|
|
true,0,0,0,0,0,0,'0',2009-01-01 00:00:00,2009
|
|
false,1,1,1,10,1.100000023841858,10.1,'1',2009-01-01 00:01:00,2009
|
|
true,0,0,0,0,0,0,'0',2009-03-01 00:00:00,2009
|
|
false,1,1,1,10,1.100000023841858,10.1,'1',2009-03-01 00:01:00,2009
|
|
====
|
|
---- QUERY
|
|
select sum(all_types_fn(
|
|
string_col, bool_col, tinyint_col, smallint_col,
|
|
int_col, bigint_col, float_col, double_col, cast(tinyint_col as decimal(2,0))))
|
|
from functional.alltypes;
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
# Verify with 'select sum(length(string_col)) + sum(cast(bool_col as int))
|
|
# + sum(tinyint_col) + sum(smallint_col) + sum(int_col) + sum(bigint_col)
|
|
# + sum(cast(float_col as bigint)) + sum(cast(double_col as bigint)) + sum(tinyint_col)
|
|
# from functional.alltypes;'
|
|
832200
|
|
====
|
|
---- QUERY
|
|
select no_args() from functional.alltypes limit 1;
|
|
---- TYPES
|
|
string
|
|
---- RESULTS
|
|
'string'
|
|
====
|
|
---- QUERY
|
|
# Chain UDFs/exprs together to test glue
|
|
select identity(no_args());
|
|
---- TYPES
|
|
string
|
|
---- RESULTS
|
|
'string'
|
|
====
|
|
---- QUERY
|
|
select identity(cast(identity(3.0) as bigint));
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
3
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional.alltypessmall having identity(count(*)) > 1
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
100
|
|
====
|
|
---- QUERY
|
|
select count(identity(id)) from functional.alltypessmall
|
|
having identity(count(*)) > 1
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
100
|
|
====
|
|
---- QUERY
|
|
select count(identity(id)) from functional.alltypessmall
|
|
group by identity(int_col)
|
|
having identity(count(*)) > 10
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
12
|
|
12
|
|
12
|
|
12
|
|
12
|
|
====
|
|
---- QUERY
|
|
select identity(a.tinyint_col),
|
|
identity(b.id),
|
|
identity(a.string_col)
|
|
from functional.alltypesagg a join functional.alltypessmall b on
|
|
(identity(a.tinyint_col) = identity(b.id))
|
|
and identity(a.tinyint_col + b.tinyint_col) < 5
|
|
where identity(a.month) = identity(1)
|
|
and identity(a.day) = identity(1)
|
|
and identity(a.string_col) > identity('88')
|
|
and identity(b.bool_col) = identity(false)
|
|
order by identity(a.string_col)
|
|
limit 5
|
|
---- TYPES
|
|
tinyint, int, string
|
|
---- RESULTS
|
|
1,1,'881'
|
|
1,1,'891'
|
|
1,1,'901'
|
|
1,1,'91'
|
|
1,1,'911'
|
|
====
|
|
---- QUERY
|
|
select identity(int_col),
|
|
identity(min(identity(bool_col))),
|
|
identity(max(identity(tinyint_col))),
|
|
identity(max(identity(smallint_col))),
|
|
identity(max(identity(int_col))),
|
|
identity(max(identity(bigint_col))),
|
|
identity(max(identity(float_col))),
|
|
identity(max(identity(double_col))),
|
|
identity(max(identity(string_col))),
|
|
identity(max(identity(timestamp_col)))
|
|
from functional.alltypesagg
|
|
where identity(identity(tinyint_col) > identity(1))
|
|
group by identity(int_col)
|
|
having identity(identity(int_col) > identity(998))
|
|
---- TYPES
|
|
int,boolean,tinyint,smallint,int,bigint,float,double,string,timestamp
|
|
---- RESULTS
|
|
999,false,9,99,999,9990,1098.900024414062,10089.9,'999',2010-01-10 18:02:05.100000000
|
|
====
|
|
---- QUERY
|
|
select identity(year),
|
|
identity(min(identity(month))),
|
|
identity(min(string_col)),
|
|
identity(max(timestamp_col))
|
|
from functional.alltypesagg group by identity(year)
|
|
having identity(identity(year) = identity(2010))
|
|
---- TYPES
|
|
int,int,string,timestamp
|
|
---- RESULTS
|
|
2010,1,'0',2010-01-10 18:02:05.100000000
|
|
====
|
|
---- QUERY
|
|
select min(identity(int_col)) from functional.alltypesagg where int_col is null;
|
|
---- TYPES
|
|
int
|
|
---- RESULTS
|
|
NULL
|
|
====
|
|
---- QUERY
|
|
select var_sum(NULL, NULL, NULL)
|
|
---- TYPES
|
|
int
|
|
---- RESULTS
|
|
NULL
|
|
====
|
|
---- QUERY
|
|
select var_and(true, false, true)
|
|
---- TYPES
|
|
boolean
|
|
---- RESULTS
|
|
false
|
|
====
|
|
---- QUERY
|
|
select var_and(true, true, true, true, true)
|
|
---- TYPES
|
|
boolean
|
|
---- RESULTS
|
|
true
|
|
====
|
|
---- QUERY
|
|
select var_sum(1, 2, 3, 4, 5, 6)
|
|
---- TYPES
|
|
int
|
|
---- RESULTS
|
|
21
|
|
====
|
|
---- QUERY
|
|
select var_sum(1.0, 2.0, 3.0)
|
|
---- TYPES
|
|
decimal
|
|
---- RESULTS
|
|
6.00
|
|
====
|
|
---- QUERY
|
|
select var_sum("Hello", "World", "Foo", "Bar")
|
|
---- TYPES
|
|
int
|
|
---- RESULTS
|
|
16
|
|
====
|
|
---- QUERY
|
|
select var_sum(cast(1 as decimal(4,2)), cast(2 as decimal(4,2)), cast(3 as decimal(4,2)));
|
|
---- TYPES
|
|
decimal
|
|
---- RESULTS
|
|
6.00
|
|
====
|
|
---- QUERY
|
|
# More complicated arguments
|
|
select var_sum(
|
|
cast(1 as decimal(4,2)), cast(2 as decimal(4,2)),
|
|
cast(3 as decimal(3,2)) + cast("1.1" as decimal(3,2)));
|
|
---- TYPES
|
|
decimal
|
|
---- RESULTS
|
|
7.10
|
|
====
|
|
---- QUERY
|
|
select tinyint_col, int_col, var_sum(tinyint_col, int_col)
|
|
from functional.alltypestiny
|
|
---- TYPES
|
|
tinyint, int, int
|
|
---- RESULTS
|
|
0,0,0
|
|
1,1,2
|
|
0,0,0
|
|
1,1,2
|
|
0,0,0
|
|
1,1,2
|
|
0,0,0
|
|
1,1,2
|
|
====
|
|
---- QUERY
|
|
select var_sum_multiply(NULL, 1, 2)
|
|
---- TYPES
|
|
double
|
|
---- RESULTS
|
|
NULL
|
|
====
|
|
---- QUERY
|
|
select var_sum_multiply(1.0, 1, 2, NULL, 3)
|
|
---- TYPES
|
|
double
|
|
---- RESULTS
|
|
6
|
|
====
|
|
---- QUERY
|
|
select var_sum_multiply(5.0, 1, 2, 3, 4, 5, 6)
|
|
---- TYPES
|
|
double
|
|
---- RESULTS
|
|
105
|
|
====
|
|
---- QUERY
|
|
select var_sum_multiply2(5.0, 1, 2, 3, 4, 5, 6)
|
|
---- TYPES
|
|
double
|
|
---- RESULTS
|
|
105
|
|
====
|
|
---- QUERY
|
|
select to_lower("HELLO")
|
|
---- TYPES
|
|
string
|
|
---- RESULTS
|
|
'hello'
|
|
====
|
|
---- QUERY
|
|
select tinyint_col, int_col, var_sum_multiply(2, tinyint_col, int_col)
|
|
from functional.alltypestiny
|
|
---- TYPES
|
|
tinyint, int, double
|
|
---- RESULTS
|
|
0,0,0
|
|
1,1,4
|
|
0,0,0
|
|
1,1,4
|
|
0,0,0
|
|
1,1,4
|
|
0,0,0
|
|
1,1,4
|
|
====
|
|
---- QUERY
|
|
# Test UDFs that are evaluated in the planner (doesn't take cols as input)
|
|
# and returns a string.
|
|
select count(*) from functional.alltypessmall where No_Args() = 'string'
|
|
---- TYPES
|
|
BIGINT
|
|
---- RESULTS
|
|
100
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional.alltypessmall where No_Args() != 'string'
|
|
---- TYPES
|
|
BIGINT
|
|
---- RESULTS
|
|
0
|
|
====
|
|
---- QUERY
|
|
select validate_arg_type("dummy")
|
|
---- TYPES
|
|
BOOLEAN
|
|
---- RESULTS
|
|
true
|
|
====
|
|
---- QUERY
|
|
select constant_arg(1), constant_arg(int_col) from functional.alltypestiny limit 1;
|
|
---- TYPES
|
|
int,int
|
|
---- RESULTS
|
|
1,NULL
|
|
====
|
|
---- QUERY
|
|
# Test applying a UDF on a partition column predicate (IMPALA-887)
|
|
select * from functional.alltypestiny where identity(year) = 2009 and identity(month) = 1;
|
|
---- RESULTS
|
|
0,true,0,0,0,0,0,0,'01/01/09','0',2009-01-01 00:00:00,2009,1
|
|
1,false,1,1,1,10,1.100000023841858,10.1,'01/01/09','1',2009-01-01 00:01:00,2009,1
|
|
---- TYPES
|
|
INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIMESTAMP, INT, INT
|
|
====
|
|
---- QUERY
|
|
select mem_test(100);
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
100
|
|
====
|
|
---- QUERY
|
|
select mem_test_leaks(100);
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
100
|
|
---- ERRORS
|
|
# TODO: this should print a warning, but we can't retrieve errors from the runtime state
|
|
# log if they're added while the query is closing.
|
|
====
|
|
---- QUERY
|
|
# Make sure rand() is non-constant
|
|
select constant_arg(cast(rand() as int));
|
|
---- TYPES
|
|
INT
|
|
---- RESULTS
|
|
NULL
|
|
====
|
|
---- QUERY
|
|
select four_args(1,2,3,4);
|
|
---- TYPES
|
|
INT
|
|
---- RESULTS
|
|
10
|
|
====
|
|
---- QUERY
|
|
select five_args(1,2,3,4,5);
|
|
---- TYPES
|
|
INT
|
|
---- RESULTS
|
|
15
|
|
====
|
|
---- QUERY
|
|
select six_args(1,2,3,4,5,6);
|
|
---- TYPES
|
|
INT
|
|
---- RESULTS
|
|
21
|
|
====
|
|
---- QUERY
|
|
select seven_args(1,2,3,4,5,6,7);
|
|
---- TYPES
|
|
INT
|
|
---- RESULTS
|
|
28
|
|
====
|
|
---- QUERY
|
|
select eight_args(1,2,3,4,5,6,7,8);
|
|
---- TYPES
|
|
INT
|
|
---- RESULTS
|
|
36
|
|
====
|
|
---- QUERY
|
|
select twenty_args(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20);
|
|
---- TYPES
|
|
INT
|
|
---- RESULTS
|
|
210
|
|
====
|
|
---- QUERY
|
|
# Disable expr rewrites (constant folding) because count_rows() is non-deterministic.
|
|
set enable_expr_rewrites=false;
|
|
select count_rows() from functional.alltypestiny;
|
|
---- TYPES
|
|
BIGINT
|
|
---- RESULTS
|
|
1
|
|
2
|
|
3
|
|
4
|
|
5
|
|
6
|
|
7
|
|
8
|
|
====
|