mirror of
https://github.com/apache/impala.git
synced 2026-01-05 21:00:54 -05:00
Previously, CopyStringVal() mistakenly copies a null
StringVal as an empty string (i.e. a non-null string
with zero length). This change fixes the problem by
distinguishing between these two cases in CopyStringVal()
and handles them properly. Also added a test case for it.
This problem only started showing up recently due to
commit 51268c053f which
calls CopyStringVal() in OffsetFnInit(). All other
pre-existing callers of CopyStringVal() before that
commit checks if 'src' is null before calling it so
the problem never showed up. In that sense, this is
a latent bug exposed by the aforementioned commit.
Change-Id: I3a5b9349dd08556eba5cfedc8c0063cc59f5be03
Reviewed-on: http://gerrit.cloudera.org:8080/5198
Reviewed-by: Michael Ho <kwho@cloudera.com>
Tested-by: Internal Jenkins
1920 lines
56 KiB
Plaintext
1920 lines
56 KiB
Plaintext
====
|
|
---- QUERY
|
|
select year, month,
|
|
count(int_col) over (partition by year, month),
|
|
avg(int_col) over (partition by year, month),
|
|
avg(timestamp_col) over (partition by year, month),
|
|
min(string_col) over (partition by year, month),
|
|
max(string_col) over (partition by year, month)
|
|
from alltypessmall
|
|
where id % 4 = 0 and month != 1;
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
|
|
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
|
|
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
|
|
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
|
|
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
|
|
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
|
|
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
|
|
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
|
|
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
|
|
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
|
|
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
|
|
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
|
|
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
|
|
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
|
|
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
|
|
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
|
|
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
|
|
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
|
|
---- TYPES
|
|
INT, INT, BIGINT, DOUBLE, TIMESTAMP, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
select int_col,
|
|
count(int_col) over ()
|
|
from alltypessmall
|
|
where id % 4 = 0 and month = 1
|
|
order by int_col
|
|
---- RESULTS
|
|
0,7
|
|
0,7
|
|
2,7
|
|
4,7
|
|
4,7
|
|
6,7
|
|
8,7
|
|
---- TYPES
|
|
INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
select tinyint_col,
|
|
count(int_col) over (order by tinyint_col)
|
|
from alltypessmall
|
|
where month = 1 and tinyint_col < 5
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,3
|
|
0,3
|
|
0,3
|
|
1,6
|
|
1,6
|
|
1,6
|
|
2,9
|
|
2,9
|
|
2,9
|
|
3,12
|
|
3,12
|
|
3,12
|
|
4,15
|
|
4,15
|
|
4,15
|
|
---- TYPES
|
|
TINYINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test ordering expressions having NULL values
|
|
select tinyint_col, smallint_col, int_col,
|
|
sum(smallint_col) over (order by tinyint_col nulls first)
|
|
from alltypesagg
|
|
where year = 2010 and month = 1 and day = 4 and id <= 3030 and bool_col = true
|
|
order by tinyint_col, id, smallint_col, int_col
|
|
---- RESULTS
|
|
2,2,2,96
|
|
2,12,12,96
|
|
2,22,22,96
|
|
4,4,4,138
|
|
4,14,14,138
|
|
4,24,24,138
|
|
6,6,6,186
|
|
6,16,16,186
|
|
6,26,26,186
|
|
8,8,8,240
|
|
8,18,18,240
|
|
8,28,28,240
|
|
NULL,NULL,NULL,60
|
|
NULL,10,10,60
|
|
NULL,20,20,60
|
|
NULL,30,30,60
|
|
---- TYPES
|
|
TINYINT, SMALLINT, INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test partitions and ordering expressions having NULL values for both
|
|
# the default window and ROWS windows.
|
|
select tinyint_col, smallint_col,
|
|
sum(smallint_col) over (partition by tinyint_col order by smallint_col),
|
|
sum(smallint_col) over (partition by tinyint_col order by smallint_col
|
|
rows between unbounded preceding and 1 following),
|
|
sum(smallint_col) over (partition by tinyint_col order by smallint_col
|
|
rows between 1 following and 2 following)
|
|
from alltypesagg
|
|
where year = 2010 and month = 1 and day = 4 and id <= 3200
|
|
and (smallint_col < 50 or smallint_col is NULL)
|
|
and (tinyint_col = 1 or tinyint_col is NULL)
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
1,1,2,2,12
|
|
1,1,2,13,22
|
|
1,11,24,24,32
|
|
1,11,24,45,42
|
|
1,21,66,66,52
|
|
1,21,66,97,62
|
|
1,31,128,128,72
|
|
1,31,128,169,82
|
|
1,41,210,210,41
|
|
1,41,210,210,NULL
|
|
NULL,10,20,20,30
|
|
NULL,10,20,40,40
|
|
NULL,20,60,60,50
|
|
NULL,20,60,90,60
|
|
NULL,30,120,120,70
|
|
NULL,30,120,160,80
|
|
NULL,40,200,200,40
|
|
NULL,40,200,200,NULL
|
|
NULL,NULL,200,200,NULL
|
|
NULL,NULL,200,200,NULL
|
|
NULL,NULL,200,200,NULL
|
|
---- TYPES
|
|
TINYINT, SMALLINT, BIGINT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test sum() NULL handling with different sliding windows (sum() has special NULL
|
|
# handling in Remove()).
|
|
select
|
|
sum(tinyint_col) over (order by id nulls last rows between 1 preceding and 1 preceding),
|
|
sum(tinyint_col) over (order by id nulls last rows between 1 preceding and current row),
|
|
sum(tinyint_col) over (order by id nulls last rows between current row and 1 following),
|
|
sum(tinyint_col) over (order by id nulls last rows between 1 following and 1 following)
|
|
from alltypesagg where id < 30 and (tinyint_col is null or tinyint_col < 6)
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
NULL,NULL,NULL,NULL
|
|
NULL,NULL,1,1
|
|
NULL,1,3,2
|
|
1,3,5,3
|
|
2,5,7,4
|
|
3,7,9,5
|
|
4,9,5,NULL
|
|
5,5,NULL,NULL
|
|
NULL,NULL,1,1
|
|
NULL,1,3,2
|
|
1,3,5,3
|
|
2,5,7,4
|
|
3,7,9,5
|
|
4,9,5,NULL
|
|
5,5,NULL,NULL
|
|
NULL,NULL,1,1
|
|
NULL,1,3,2
|
|
1,3,5,3
|
|
2,5,7,4
|
|
3,7,9,5
|
|
4,9,5,NULL
|
|
---- TYPES
|
|
BIGINT, BIGINT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test multiple partition exprs and multiple order by exprs
|
|
select year, month, bool_col, tinyint_col,
|
|
count(id) over (partition by year, month order by bool_col, tinyint_col)
|
|
from alltypes
|
|
where tinyint_col < 2 and month < 3 and (id % 100 < 10)
|
|
order by year, month, bool_col, tinyint_col
|
|
---- RESULTS
|
|
2009,1,false,1,4
|
|
2009,1,false,1,4
|
|
2009,1,false,1,4
|
|
2009,1,false,1,4
|
|
2009,1,true,0,8
|
|
2009,1,true,0,8
|
|
2009,1,true,0,8
|
|
2009,1,true,0,8
|
|
2009,2,false,1,2
|
|
2009,2,false,1,2
|
|
2009,2,true,0,4
|
|
2009,2,true,0,4
|
|
2010,1,false,1,3
|
|
2010,1,false,1,3
|
|
2010,1,false,1,3
|
|
2010,1,true,0,6
|
|
2010,1,true,0,6
|
|
2010,1,true,0,6
|
|
2010,2,false,1,3
|
|
2010,2,false,1,3
|
|
2010,2,false,1,3
|
|
2010,2,true,0,6
|
|
2010,2,true,0,6
|
|
2010,2,true,0,6
|
|
---- TYPES
|
|
INT, INT, BOOLEAN, TINYINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Same as above but with no ordering and a limit. Only count the number of results
|
|
# because they are non-deterministic.
|
|
select count(*) from
|
|
(select year, month, bool_col, tinyint_col,
|
|
count(id) over (partition by year, month order by bool_col, tinyint_col)
|
|
from alltypes
|
|
where tinyint_col < 2 and month < 3 and (id % 100 < 10)
|
|
limit 7) v
|
|
---- RESULTS
|
|
7
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Simple ROWS window
|
|
select (month % 2), int_col,
|
|
sum(int_col) over (partition by (month % 2)
|
|
order by int_col rows between unbounded preceding and current row)
|
|
from alltypestiny
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,0,0
|
|
0,0,0
|
|
0,1,1
|
|
0,1,2
|
|
1,0,0
|
|
1,0,0
|
|
1,1,1
|
|
1,1,2
|
|
---- TYPES
|
|
INT, INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# ROWS windows with an end boundary offset, some before or after all results.
|
|
select
|
|
count(tinyint_col) over (partition by bool_col order by tinyint_col
|
|
rows between unbounded preceding and 1 preceding),
|
|
sum(tinyint_col) over (partition by bool_col order by tinyint_col
|
|
rows between unbounded preceding and 1 following),
|
|
sum(tinyint_col) over (partition by bool_col order by tinyint_col
|
|
rows between unbounded preceding and 3 preceding),
|
|
sum(tinyint_col) over (partition by bool_col order by tinyint_col
|
|
rows between unbounded preceding and 20 following),
|
|
sum(tinyint_col) over (partition by bool_col order by tinyint_col
|
|
rows between unbounded preceding and 20 preceding)
|
|
from alltypesagg where id <= 10 and day = 1
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,4,NULL,25,NULL
|
|
1,9,NULL,25,NULL
|
|
2,16,NULL,25,NULL
|
|
3,25,1,25,NULL
|
|
4,25,4,25,NULL
|
|
0,6,NULL,20,NULL
|
|
1,12,NULL,20,NULL
|
|
2,20,NULL,20,NULL
|
|
3,20,2,20,NULL
|
|
4,20,6,20,NULL
|
|
4,20,12,20,NULL
|
|
---- TYPES
|
|
BIGINT, BIGINT, BIGINT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# multiple analytics in the same select block
|
|
select tinyint_col, int_col, bigint_col,
|
|
count(bigint_col)
|
|
over(partition by tinyint_col, bigint_col order by int_col desc
|
|
rows between unbounded preceding and 1 following),
|
|
max(tinyint_col)
|
|
over(partition by bigint_col, tinyint_col order by int_col desc
|
|
rows between unbounded preceding and 1 following),
|
|
# different window
|
|
min(int_col)
|
|
over(partition by bigint_col, tinyint_col order by int_col desc
|
|
rows between unbounded preceding and 2 following),
|
|
# different sort order but same partition
|
|
max(int_col)
|
|
over(partition by bigint_col, tinyint_col order by int_col asc
|
|
rows between unbounded preceding and 2 following),
|
|
# different partition
|
|
sum(int_col)
|
|
over(partition by tinyint_col order by int_col desc
|
|
rows between unbounded preceding and 2 following)
|
|
from functional.alltypesagg
|
|
where id < 20
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
1,1,10,1,1,1,1,12
|
|
1,11,110,1,1,11,11,12
|
|
2,2,20,1,2,2,2,14
|
|
2,12,120,1,2,12,12,14
|
|
3,3,30,1,3,3,3,16
|
|
3,13,130,1,3,13,13,16
|
|
4,4,40,1,4,4,4,18
|
|
4,14,140,1,4,14,14,18
|
|
5,5,50,1,5,5,5,20
|
|
5,15,150,1,5,15,15,20
|
|
6,6,60,1,6,6,6,22
|
|
6,16,160,1,6,16,16,22
|
|
7,7,70,1,7,7,7,24
|
|
7,17,170,1,7,17,17,24
|
|
8,8,80,1,8,8,8,26
|
|
8,18,180,1,8,18,18,26
|
|
9,9,90,1,9,9,9,28
|
|
9,19,190,1,9,19,19,28
|
|
NULL,10,100,2,NULL,10,10,20
|
|
NULL,10,100,2,NULL,10,10,20
|
|
NULL,NULL,NULL,0,NULL,NULL,NULL,10
|
|
NULL,NULL,NULL,0,NULL,NULL,NULL,20
|
|
---- TYPES
|
|
TINYINT, INT, BIGINT, BIGINT, TINYINT, INT, INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# analytics on a grouped aggregation with a final order by
|
|
select bool_col,
|
|
sum(min(int_col))
|
|
over(partition by min(tinyint_col) order by max(int_col)
|
|
rows between unbounded preceding and 1 following),
|
|
max(sum(bigint_col))
|
|
over(partition by min(tinyint_col) order by max(int_col)
|
|
rows between unbounded preceding and 1 following),
|
|
min(sum(bigint_col))
|
|
over(partition by min(tinyint_col) order by sum(int_col)
|
|
rows between unbounded preceding and 4 following)
|
|
from functional.alltypes
|
|
group by 1
|
|
order by 1, 2, 3
|
|
---- RESULTS
|
|
false,1,182500,182500
|
|
true,0,146000,146000
|
|
---- TYPES
|
|
BOOLEAN, BIGINT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test for IMPALA-1200
|
|
select DISTINCT t1.c1 FROM
|
|
(SELECT MAX(smallint_col) OVER (PARTITION BY year ORDER BY id DESC) AS c1
|
|
FROM alltypesagg) t1
|
|
---- RESULTS
|
|
99
|
|
---- TYPES
|
|
SMALLINT
|
|
====
|
|
---- QUERY
|
|
# Test for IMPALA-1233
|
|
with with_1 AS (
|
|
SELECT RANK() OVER (ORDER BY t1.id) AS int_col_1
|
|
FROM alltypesagg t1)
|
|
SELECT COUNT(t1.int_col_1) AS int_col_1
|
|
FROM with_1 t1
|
|
WHERE t1.int_col_1 IS NULL
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select tinyint_col, id,
|
|
row_number() over (order by tinyint_col, id) total_order,
|
|
row_number() over (partition by tinyint_col order by id) partitioned_order,
|
|
first_value(id) over (partition by tinyint_col order by id),
|
|
first_value(cast(id as string)) over (partition by tinyint_col order by id),
|
|
last_value(id) over (partition by tinyint_col order by id),
|
|
last_value(cast(id as string)) over (partition by tinyint_col order by id),
|
|
last_value(id) over (partition by tinyint_col order by id
|
|
rows between unbounded preceding and unbounded following)
|
|
from alltypes where id < 25 order by tinyint_col, id
|
|
---- RESULTS
|
|
0,0,1,1,0,'0',0,'0',20
|
|
0,10,2,2,0,'0',10,'10',20
|
|
0,20,3,3,0,'0',20,'20',20
|
|
1,1,4,1,1,'1',1,'1',21
|
|
1,11,5,2,1,'1',11,'11',21
|
|
1,21,6,3,1,'1',21,'21',21
|
|
2,2,7,1,2,'2',2,'2',22
|
|
2,12,8,2,2,'2',12,'12',22
|
|
2,22,9,3,2,'2',22,'22',22
|
|
3,3,10,1,3,'3',3,'3',23
|
|
3,13,11,2,3,'3',13,'13',23
|
|
3,23,12,3,3,'3',23,'23',23
|
|
4,4,13,1,4,'4',4,'4',24
|
|
4,14,14,2,4,'4',14,'14',24
|
|
4,24,15,3,4,'4',24,'24',24
|
|
5,5,16,1,5,'5',5,'5',15
|
|
5,15,17,2,5,'5',15,'15',15
|
|
6,6,18,1,6,'6',6,'6',16
|
|
6,16,19,2,6,'6',16,'16',16
|
|
7,7,20,1,7,'7',7,'7',17
|
|
7,17,21,2,7,'7',17,'17',17
|
|
8,8,22,1,8,'8',8,'8',18
|
|
8,18,23,2,8,'8',18,'18',18
|
|
9,9,24,1,9,'9',9,'9',19
|
|
9,19,25,2,9,'9',19,'19',19
|
|
---- TYPES
|
|
TINYINT, INT, BIGINT, BIGINT, INT, STRING, INT, STRING, INT
|
|
====
|
|
---- QUERY
|
|
select bool_col, id,
|
|
lag(id, 2, -12345) over (partition by bool_col order by id),
|
|
lead(id, 2, -12345) over (partition by bool_col order by id)
|
|
from alltypestiny
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
false,1,-12345,5
|
|
false,3,-12345,7
|
|
false,5,1,-12345
|
|
false,7,3,-12345
|
|
true,0,-12345,4
|
|
true,2,-12345,6
|
|
true,4,0,-12345
|
|
true,6,2,-12345
|
|
---- TYPES
|
|
BOOLEAN, INT, INT, INT
|
|
====
|
|
---- QUERY
|
|
# Tests rank() and dense_rank()
|
|
select year, month, bool_col, tinyint_col,
|
|
rank() over (partition by year, month order by bool_col, tinyint_col),
|
|
dense_rank() over (partition by year, month order by bool_col, tinyint_col)
|
|
from alltypes
|
|
where tinyint_col < 2 and month < 3 and (id % 100 < 10)
|
|
order by year, month, bool_col, tinyint_col
|
|
---- RESULTS
|
|
2009,1,false,1,1,1
|
|
2009,1,false,1,1,1
|
|
2009,1,false,1,1,1
|
|
2009,1,false,1,1,1
|
|
2009,1,true,0,5,2
|
|
2009,1,true,0,5,2
|
|
2009,1,true,0,5,2
|
|
2009,1,true,0,5,2
|
|
2009,2,false,1,1,1
|
|
2009,2,false,1,1,1
|
|
2009,2,true,0,3,2
|
|
2009,2,true,0,3,2
|
|
2010,1,false,1,1,1
|
|
2010,1,false,1,1,1
|
|
2010,1,false,1,1,1
|
|
2010,1,true,0,4,2
|
|
2010,1,true,0,4,2
|
|
2010,1,true,0,4,2
|
|
2010,2,false,1,1,1
|
|
2010,2,false,1,1,1
|
|
2010,2,false,1,1,1
|
|
2010,2,true,0,4,2
|
|
2010,2,true,0,4,2
|
|
2010,2,true,0,4,2
|
|
---- TYPES
|
|
INT, INT, BOOLEAN, TINYINT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# analytics with inline views
|
|
select bool_col, a, b, a+b from
|
|
(select
|
|
bool_col,
|
|
count(int_col) over() a,
|
|
sum(int_col + bigint_col) over(partition by bool_col) b
|
|
from
|
|
(select * from functional.alltypes where id < 20) v1) v2
|
|
order by 1, 2, 3
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
false,20,550,570
|
|
false,20,550,570
|
|
false,20,550,570
|
|
false,20,550,570
|
|
false,20,550,570
|
|
false,20,550,570
|
|
false,20,550,570
|
|
false,20,550,570
|
|
false,20,550,570
|
|
false,20,550,570
|
|
true,20,440,460
|
|
true,20,440,460
|
|
true,20,440,460
|
|
true,20,440,460
|
|
true,20,440,460
|
|
true,20,440,460
|
|
true,20,440,460
|
|
true,20,440,460
|
|
true,20,440,460
|
|
true,20,440,460
|
|
---- TYPES
|
|
BOOLEAN, BIGINT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# analytics and unions
|
|
select 1 a, min(id) over (partition by int_col)
|
|
from functional.alltypes where id < 5
|
|
union distinct
|
|
select 2, max(id) over (partition by bool_col)
|
|
from functional.alltypessmall where id >= 5 and id <= 10
|
|
union all
|
|
(select 3, sum(bigint_col) over (partition by int_col order by id)
|
|
from functional.alltypestiny where bool_col = false)
|
|
order by 1, 2 desc nulls first
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
1,4
|
|
1,3
|
|
1,2
|
|
1,1
|
|
1,0
|
|
2,10
|
|
2,9
|
|
3,40
|
|
3,30
|
|
3,20
|
|
3,10
|
|
---- TYPES
|
|
TINYINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# analytics in an uncorrelated subquery
|
|
select id, int_col, bool_col from functional.alltypestiny t1
|
|
where int_col in
|
|
(select min(bigint_col) over(partition by bool_col)
|
|
from functional.alltypessmall t2 where t2.id < 10)
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,0,true
|
|
2,0,true
|
|
4,0,true
|
|
6,0,true
|
|
---- TYPES
|
|
INT, INT, BOOLEAN
|
|
====
|
|
---- QUERY
|
|
# test conjunct evaluation in analytic nodes
|
|
select * from
|
|
(select bool_col, tinyint_col,
|
|
sum(tinyint_col) over (partition by bool_col order by tinyint_col
|
|
rows between unbounded preceding and 1 following) a
|
|
from alltypesagg where id <= 10 and day = 1) v
|
|
where
|
|
v.tinyint_col * 3 > v.a and
|
|
cast(v.a as boolean) = v.bool_col and
|
|
v.a is not null
|
|
---- RESULTS
|
|
true,8,20
|
|
---- TYPES
|
|
BOOLEAN, TINYINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test ROWS windows with start boundaries
|
|
select id,
|
|
count(id) over (order by id rows between 3 preceding and 3 preceding) c1,
|
|
count(id) over (order by id rows between 3 preceding and 2 preceding) c2,
|
|
count(id) over (order by id rows between 3 preceding and 1 preceding) c3,
|
|
count(id) over (order by id rows between 3 preceding and current row) c4,
|
|
count(id) over (order by id rows between 3 preceding and 1 following) c5,
|
|
count(id) over (order by id rows between 3 preceding and 2 following) c6,
|
|
count(id) over (order by id rows between 3 preceding and 3 following) c7,
|
|
count(id) over (order by id rows between 2 preceding and 3 following) c8,
|
|
count(id) over (order by id rows between 1 preceding and 3 following) c9,
|
|
count(id) over (order by id rows between current row and 3 following) c10,
|
|
count(id) over (order by id rows between 1 following and 3 following) c11,
|
|
count(id) over (order by id rows between 2 following and 3 following) c12,
|
|
count(id) over (order by id rows between 3 following and 3 following) c13
|
|
from alltypes where id < 8
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,0,0,0,1,2,3,4,4,4,4,3,2,1
|
|
1,0,0,1,2,3,4,5,5,5,4,3,2,1
|
|
2,0,1,2,3,4,5,6,6,5,4,3,2,1
|
|
3,1,2,3,4,5,6,7,6,5,4,3,2,1
|
|
4,1,2,3,4,5,6,7,6,5,4,3,2,1
|
|
5,1,2,3,4,5,6,6,5,4,3,2,1,0
|
|
6,1,2,3,4,5,5,5,4,3,2,1,0,0
|
|
7,1,2,3,4,4,4,4,3,2,1,0,0,0
|
|
---- TYPES
|
|
INT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test sum() and avg() removing values
|
|
select id,
|
|
sum(int_col) over (order by id rows between 1 preceding and 1 following),
|
|
sum(double_col) over (order by id rows between 3 preceding and 2 preceding),
|
|
avg(int_col) over (order by id rows between 1 preceding and 1 following),
|
|
avg(double_col) over (order by id rows between 3 preceding and 2 preceding),
|
|
avg(timestamp_col) over (order by id rows between 2 following and 3 following)
|
|
from alltypes where id < 8
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,1,NULL,0.5,NULL,2009-01-01 00:02:30.199999809
|
|
1,3,NULL,1,NULL,2009-01-01 00:03:30.449999809
|
|
2,6,0,2,0,2009-01-01 00:04:30.349999904
|
|
3,9,10.1,3,5.05,2009-01-01 00:05:30.124999999
|
|
4,12,30.3,4,15.15,2009-01-01 00:06:30.180000066
|
|
5,15,50.49999999999999,5,25.25,2009-01-01 00:07:00.210000038
|
|
6,18,70.69999999999999,6,35.34999999999999,NULL
|
|
7,13,90.89999999999999,6.5,45.45,NULL
|
|
---- TYPES
|
|
INT, BIGINT, DOUBLE, DOUBLE, DOUBLE, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# More testing of start bounds. This exposed a bug in removing
|
|
# values from the window after the partition.
|
|
select tinyint_col, int_col,
|
|
sum(int_col) over (partition by tinyint_col order by int_col nulls last rows between 2 following and 3 following),
|
|
sum(int_col) over (partition by tinyint_col order by int_col nulls last rows between 2 following and 10 following),
|
|
sum(int_col) over (partition by tinyint_col order by int_col nulls last rows between 6 following and 100 following)
|
|
from alltypesagg
|
|
where (tinyint_col is NULL or tinyint_col < 3) and id < 60;
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
NULL,10,40,280,180
|
|
NULL,10,50,260,140
|
|
NULL,20,60,240,100
|
|
NULL,20,70,210,50
|
|
NULL,30,80,180,NULL
|
|
NULL,30,90,140,NULL
|
|
NULL,40,100,100,NULL
|
|
NULL,40,50,50,NULL
|
|
NULL,50,NULL,NULL,NULL
|
|
NULL,50,NULL,NULL,NULL
|
|
NULL,NULL,NULL,NULL,NULL
|
|
NULL,NULL,NULL,NULL,NULL
|
|
1,1,52,144,NULL
|
|
1,11,72,123,NULL
|
|
1,21,92,92,NULL
|
|
1,31,51,51,NULL
|
|
1,41,NULL,NULL,NULL
|
|
1,51,NULL,NULL,NULL
|
|
2,2,54,148,NULL
|
|
2,12,74,126,NULL
|
|
2,22,94,94,NULL
|
|
2,32,52,52,NULL
|
|
2,42,NULL,NULL,NULL
|
|
2,52,NULL,NULL,NULL
|
|
---- TYPES
|
|
TINYINT, INT, BIGINT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1273: Incorrect results with very large window and small table
|
|
select
|
|
count(int_col) over (order by bigint_col rows between 95 preceding and 40 following)
|
|
from alltypestiny
|
|
---- RESULTS
|
|
8
|
|
8
|
|
8
|
|
8
|
|
8
|
|
8
|
|
8
|
|
8
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select count(distinct t1.c1) from
|
|
(select count(id) over (order by id rows between 550 preceding and 550 following) c1
|
|
from alltypesagg) t1
|
|
---- RESULTS
|
|
551
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select count(distinct t1.c1) from
|
|
(select count(id) over (order by id rows between 1050 preceding and 2 following) c1
|
|
from alltypesagg) t1
|
|
---- RESULTS
|
|
1051
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select count(distinct t1.c1) from
|
|
(select count(id) over (order by id rows between 1050 preceding and 500 preceding) c1
|
|
from alltypesagg) t1
|
|
---- RESULTS
|
|
552
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select count(distinct t1.c1) from
|
|
(select count(id) over (order by id rows between 2 following and 1050 following) c1
|
|
from alltypesagg) t1
|
|
---- RESULTS
|
|
1050
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select count(distinct t1.c1) from
|
|
(select count(id) over (order by id rows between 500 following and 1050 following) c1
|
|
from alltypesagg) t1
|
|
---- RESULTS
|
|
552
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select (tinyint_col % 4), id,
|
|
count(id) over (partition by (tinyint_col % 4) order by id
|
|
rows between 3 following and 10 following)
|
|
from alltypes where id < 20
|
|
order by (tinyint_col % 4), id
|
|
---- RESULTS
|
|
0,0,3
|
|
0,4,2
|
|
0,8,1
|
|
0,10,0
|
|
0,14,0
|
|
0,18,0
|
|
1,1,3
|
|
1,5,2
|
|
1,9,1
|
|
1,11,0
|
|
1,15,0
|
|
1,19,0
|
|
2,2,1
|
|
2,6,0
|
|
2,12,0
|
|
2,16,0
|
|
3,3,1
|
|
3,7,0
|
|
3,13,0
|
|
3,17,0
|
|
---- TYPES
|
|
TINYINT, INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test IMPALA-1269: ROWS windows with offset to current row might not remove
|
|
# correct values after new partitions
|
|
select sum(t2.int_col_1) from
|
|
(select SUM(t1.int_col) OVER (PARTITION BY t1.smallint_col ORDER BY t1.day, t1.int_col
|
|
ROWS 7 PRECEDING) AS int_col_1
|
|
FROM alltypesagg t1 ) t2
|
|
---- RESULTS
|
|
41790600
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test FIRST_VALUE with strings and null handling
|
|
select cast(floor(id/3) as int), tinyint_col,
|
|
first_value(tinyint_col) over (partition by cast(floor(id/3) as int) order by tinyint_col nulls first),
|
|
first_value(cast(tinyint_col as string)) over (partition by cast(floor(id/3) as int) order by cast(tinyint_col as string) nulls first),
|
|
last_value(tinyint_col) over (partition by cast(floor(id/3) as int) order by tinyint_col nulls first),
|
|
last_value(cast(tinyint_col as string)) over (partition by cast(floor(id/3) as int) order by cast(tinyint_col as string) nulls first)
|
|
from alltypesagg where id < 50 and (tinyint_col is null or tinyint_col < 4)
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,NULL,NULL,'NULL',NULL,'NULL'
|
|
0,NULL,NULL,'NULL',NULL,'NULL'
|
|
0,1,NULL,'NULL',1,'1'
|
|
0,2,NULL,'NULL',2,'2'
|
|
1,3,3,'3',3,'3'
|
|
3,NULL,NULL,'NULL',NULL,'NULL'
|
|
3,NULL,NULL,'NULL',NULL,'NULL'
|
|
3,1,NULL,'NULL',1,'1'
|
|
4,2,2,'2',2,'2'
|
|
4,3,2,'2',3,'3'
|
|
6,NULL,NULL,'NULL',NULL,'NULL'
|
|
6,NULL,NULL,'NULL',NULL,'NULL'
|
|
7,1,1,'1',1,'1'
|
|
7,2,1,'1',2,'2'
|
|
7,3,1,'1',3,'3'
|
|
10,NULL,NULL,'NULL',NULL,'NULL'
|
|
10,NULL,NULL,'NULL',NULL,'NULL'
|
|
10,1,NULL,'NULL',1,'1'
|
|
10,2,NULL,'NULL',2,'2'
|
|
11,3,3,'3',3,'3'
|
|
13,NULL,NULL,'NULL',NULL,'NULL'
|
|
13,NULL,NULL,'NULL',NULL,'NULL'
|
|
13,1,NULL,'NULL',1,'1'
|
|
14,2,2,'2',2,'2'
|
|
14,3,2,'2',3,'3'
|
|
---- TYPES
|
|
INT, TINYINT, TINYINT, STRING, TINYINT, STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1302: Incorrect result of FIRST_VALUE query.
|
|
select
|
|
first_value(id) over (order by id rows between 3 preceding and 3 following),
|
|
first_value(id) over (order by id rows between 3 preceding and 2 following),
|
|
first_value(id) over (order by id rows between 3 preceding and 1 following),
|
|
first_value(id) over (order by id rows between 3 preceding and current row),
|
|
first_value(id) over (order by id rows between 3 preceding and 1 preceding),
|
|
first_value(id) over (order by id rows between 3 preceding and 2 preceding),
|
|
first_value(id) over (order by id rows between 3 preceding and 3 preceding),
|
|
first_value(id) over (order by id rows between 2 preceding and 3 following),
|
|
first_value(id) over (order by id rows between 1 preceding and 3 following),
|
|
first_value(id) over (order by id rows between current row and 3 following),
|
|
first_value(id) over (order by id rows between 1 following and 3 following),
|
|
first_value(id) over (order by id rows between 2 following and 3 following),
|
|
first_value(id) over (order by id rows between 3 following and 3 following),
|
|
first_value(id) over (order by id rows between 15 preceding and 15 preceding),
|
|
first_value(id) over (order by id rows between 15 preceding and 15 following),
|
|
first_value(id) over (order by id rows between 15 preceding and 6 preceding),
|
|
first_value(id) over (order by id rows between 6 preceding and 15 following),
|
|
first_value(id) over (order by id rows between 15 following and 15 following),
|
|
first_value(id) over (order by id rows between unbounded preceding and 8 preceding),
|
|
last_value(id) over (order by id rows between 3 preceding and 3 following),
|
|
last_value(id) over (order by id rows between 3 preceding and 2 following),
|
|
last_value(id) over (order by id rows between 3 preceding and 1 following),
|
|
last_value(id) over (order by id rows between 3 preceding and current row),
|
|
last_value(id) over (order by id rows between 3 preceding and 1 preceding),
|
|
last_value(id) over (order by id rows between 3 preceding and 2 preceding),
|
|
last_value(id) over (order by id rows between 3 preceding and 3 preceding),
|
|
last_value(id) over (order by id rows between 2 preceding and 3 following),
|
|
last_value(id) over (order by id rows between 1 preceding and 3 following),
|
|
last_value(id) over (order by id rows between current row and 3 following),
|
|
last_value(id) over (order by id rows between 1 following and 3 following),
|
|
last_value(id) over (order by id rows between 2 following and 3 following),
|
|
last_value(id) over (order by id rows between 3 following and 3 following),
|
|
last_value(id) over (order by id rows between 15 preceding and 15 preceding),
|
|
last_value(id) over (order by id rows between 15 preceding and 15 following),
|
|
last_value(id) over (order by id rows between 15 preceding and 6 preceding),
|
|
last_value(id) over (order by id rows between 6 preceding and 15 following),
|
|
last_value(id) over (order by id rows between 15 following and 15 following),
|
|
last_value(id) over (order by id rows between 8 following and unbounded following)
|
|
from alltypes where id < 10 order by id;
|
|
---- RESULTS
|
|
0,0,0,0,NULL,NULL,NULL,0,0,0,1,2,3,NULL,0,NULL,0,NULL,NULL,3,2,1,0,NULL,NULL,NULL,3,3,3,3,3,3,NULL,9,NULL,9,NULL,9
|
|
0,0,0,0,0,NULL,NULL,0,0,1,2,3,4,NULL,0,NULL,0,NULL,NULL,4,3,2,1,0,NULL,NULL,4,4,4,4,4,4,NULL,9,NULL,9,NULL,9
|
|
0,0,0,0,0,0,NULL,0,1,2,3,4,5,NULL,0,NULL,0,NULL,NULL,5,4,3,2,1,0,NULL,5,5,5,5,5,5,NULL,9,NULL,9,NULL,NULL
|
|
0,0,0,0,0,0,0,1,2,3,4,5,6,NULL,0,NULL,0,NULL,NULL,6,5,4,3,2,1,0,6,6,6,6,6,6,NULL,9,NULL,9,NULL,NULL
|
|
1,1,1,1,1,1,1,2,3,4,5,6,7,NULL,0,NULL,0,NULL,NULL,7,6,5,4,3,2,1,7,7,7,7,7,7,NULL,9,NULL,9,NULL,NULL
|
|
2,2,2,2,2,2,2,3,4,5,6,7,8,NULL,0,NULL,0,NULL,NULL,8,7,6,5,4,3,2,8,8,8,8,8,8,NULL,9,NULL,9,NULL,NULL
|
|
3,3,3,3,3,3,3,4,5,6,7,8,9,NULL,0,0,0,NULL,NULL,9,8,7,6,5,4,3,9,9,9,9,9,9,NULL,9,0,9,NULL,NULL
|
|
4,4,4,4,4,4,4,5,6,7,8,9,NULL,NULL,0,0,1,NULL,NULL,9,9,8,7,6,5,4,9,9,9,9,9,NULL,NULL,9,1,9,NULL,NULL
|
|
5,5,5,5,5,5,5,6,7,8,9,NULL,NULL,NULL,0,0,2,NULL,0,9,9,9,8,7,6,5,9,9,9,9,NULL,NULL,NULL,9,2,9,NULL,NULL
|
|
6,6,6,6,6,6,6,7,8,9,NULL,NULL,NULL,NULL,0,0,3,NULL,0,9,9,9,9,8,7,6,9,9,9,NULL,NULL,NULL,NULL,9,3,9,NULL,NULL
|
|
---- TYPES
|
|
INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT
|
|
====
|
|
---- QUERY
|
|
# Test RANGE with UNBOUNDED FOLLOWING
|
|
select id,
|
|
sum(id) over (order by id range between unbounded preceding and unbounded following)
|
|
from alltypesagg where id <= 30 order by id;
|
|
---- RESULTS
|
|
0,525
|
|
0,525
|
|
1,525
|
|
2,525
|
|
3,525
|
|
4,525
|
|
5,525
|
|
6,525
|
|
7,525
|
|
8,525
|
|
9,525
|
|
10,525
|
|
10,525
|
|
11,525
|
|
12,525
|
|
13,525
|
|
14,525
|
|
15,525
|
|
16,525
|
|
17,525
|
|
18,525
|
|
19,525
|
|
20,525
|
|
20,525
|
|
21,525
|
|
22,525
|
|
23,525
|
|
24,525
|
|
25,525
|
|
26,525
|
|
27,525
|
|
28,525
|
|
29,525
|
|
30,525
|
|
30,525
|
|
---- TYPES
|
|
INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1293: DCHECK failure with window ROWS BETWEEN UNBOUNDED PRECEDING
|
|
# AND <large> PRECEDING
|
|
select count(*), count(t1.c1) from
|
|
(select SUM(tinyint_col) OVER (PARTITION BY bigint_col ORDER BY tinyint_col ASC,
|
|
id ASC, timestamp_col ASC, day ASC
|
|
ROWS BETWEEN UNBOUNDED PRECEDING AND 11 PRECEDING) c1
|
|
FROM alltypesagg) t1
|
|
---- RESULTS
|
|
11000,0
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test RANGE with UNBOUNDED FOLLOWING on many partitions
|
|
select count(distinct t1.c1) from
|
|
(select sum(id) over (partition by day order by tinyint_col
|
|
range between unbounded preceding and unbounded following) c1
|
|
from alltypesagg) t1;
|
|
---- RESULTS
|
|
11
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1296: Fix DCHECK when unnecessary buffered tuple (for sum() analytic) not null
|
|
select
|
|
sum(month) over (),
|
|
dense_rank() over (order by date_string_col)
|
|
from alltypes where id < 5
|
|
---- RESULTS
|
|
5,1
|
|
5,1
|
|
5,1
|
|
5,1
|
|
5,1
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1292: Incorrect result in analytic SUM when ORDER BY column is null
|
|
select tinyint_col, id,
|
|
SUM(id) OVER (ORDER BY tinyint_col ASC, id ASC)
|
|
FROM alltypesagg
|
|
where (tinyint_col is NULL or tinyint_col < 2) and id < 100 order by 1, 2
|
|
---- RESULTS
|
|
1,1,1
|
|
1,11,12
|
|
1,21,33
|
|
1,31,64
|
|
1,41,105
|
|
1,51,156
|
|
1,61,217
|
|
1,71,288
|
|
1,81,369
|
|
1,91,460
|
|
NULL,0,460
|
|
NULL,0,460
|
|
NULL,10,480
|
|
NULL,10,480
|
|
NULL,20,520
|
|
NULL,20,520
|
|
NULL,30,580
|
|
NULL,30,580
|
|
NULL,40,660
|
|
NULL,40,660
|
|
NULL,50,760
|
|
NULL,50,760
|
|
NULL,60,880
|
|
NULL,60,880
|
|
NULL,70,1020
|
|
NULL,70,1020
|
|
NULL,80,1180
|
|
NULL,80,1180
|
|
NULL,90,1360
|
|
NULL,90,1360
|
|
---- TYPES
|
|
TINYINT, INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1280: Crash running analytic with LEFT SEMI JOIN
|
|
select sum(t1.int_col) over (partition by t1.id order by t1.int_col, t1.month)
|
|
from alltypestiny t1
|
|
where exists (select tt1.month from alltypes tt1 where t1.int_col = tt1.smallint_col)
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0
|
|
0
|
|
0
|
|
1
|
|
0
|
|
1
|
|
1
|
|
1
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-1223. Query with analytic query with right join.
|
|
# Need proper handling of NULLs.
|
|
SELECT SUM(t1.int_col) OVER () AS int_col_1 FROM alltypestiny t1
|
|
RIGHT JOIN alltypestiny t2 ON t2.tinyint_col = t1.bigint_col AND t2.id = t1.month;
|
|
---- RESULTS
|
|
0
|
|
0
|
|
0
|
|
0
|
|
0
|
|
0
|
|
0
|
|
0
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
## IMPALA-1312: Fix DCHECK failure in BufferedTupleStream
|
|
## Disabled due to slow execution without codegen, breaks exhaustive runs (IMPALA-1414)
|
|
#SELECT
|
|
# SUM(t1.int_col * t1.id) OVER (
|
|
# ORDER BY t1.int_col * t1.id ASC, t3.year + t3.tinyint_col ASC ROWS
|
|
# BETWEEN 66 PRECEDING AND 21 FOLLOWING) AS int_col_1
|
|
#FROM alltypes t1
|
|
#INNER JOIN alltypes t2 ON t2.month = t1.smallint_col
|
|
#INNER JOIN alltypes t3 ON t3.bigint_col = t2.bigint_col
|
|
#WHERE
|
|
#t2.id <= t3.smallint_col + t2.tinyint_col
|
|
#order by 1
|
|
#limit 5
|
|
#---- TYPES
|
|
#BIGINT
|
|
#---- RESULTS
|
|
#22
|
|
#23
|
|
#24
|
|
#25
|
|
#26
|
|
#====
|
|
#----QUERY
|
|
# Testing crashes in buffered tuple stream related to IMPALA-1306.
|
|
SELECT AVG(t1.int_col) OVER () AS double_col_1
|
|
FROM alltypestiny t1
|
|
WHERE EXISTS
|
|
(SELECT t1.month AS int_col_1
|
|
FROM alltypestiny t1)
|
|
---- RESULTS
|
|
0.5
|
|
0.5
|
|
0.5
|
|
0.5
|
|
0.5
|
|
0.5
|
|
0.5
|
|
0.5
|
|
---- TYPES
|
|
DOUBLE
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1808: handle partition/order by exprs with NaN
|
|
select id / int_col,
|
|
count(*) over (partition by id / int_col order by id),
|
|
count(*) over (order by id / int_col),
|
|
count(*) over (order by id / int_col rows between 100 preceding and 90 preceding),
|
|
count(*) over (order by id / int_col rows between 2 preceding and 1 preceding),
|
|
count(*) over (order by id / int_col rows between 1 preceding and 1 following),
|
|
count(*) over (order by id / int_col rows between 1 following and 2 following),
|
|
count(*) over (order by id / int_col rows between 90 following and 100 following)
|
|
from alltypestiny
|
|
---- RESULTS
|
|
NaN,1,1,0,0,2,2,0
|
|
1,1,2,0,1,3,2,0
|
|
3,1,3,0,2,3,2,0
|
|
5,1,4,0,2,3,2,0
|
|
7,1,5,0,2,3,2,0
|
|
Infinity,1,8,0,2,3,2,0
|
|
Infinity,2,8,0,2,3,1,0
|
|
Infinity,3,8,0,2,2,0,0
|
|
---- TYPES
|
|
DOUBLE, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT
|
|
====
|
|
----QUERY
|
|
# IMPALA-1483: Check that TupleIsNullPredicates referring to the logical
|
|
# analytic output are substituted with TupleIsNullPredicates referring to
|
|
# the physical output.
|
|
select t2.a, t2.id, t2.int_col
|
|
from functional.alltypes t1
|
|
left outer join
|
|
(select sum(int_col) over(order by id) a, id, int_col is null as int_col
|
|
from functional.alltypes
|
|
where year = 2009 and month between 1 and 2) t2
|
|
on (t1.id = t2.id)
|
|
where year = 2009 and month = 1
|
|
order by t2.id limit 10
|
|
---- RESULTS
|
|
0,0,false
|
|
1,1,false
|
|
3,2,false
|
|
6,3,false
|
|
10,4,false
|
|
15,5,false
|
|
21,6,false
|
|
28,7,false
|
|
36,8,false
|
|
45,9,false
|
|
---- TYPES
|
|
BIGINT, INT, BOOLEAN
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1542: Analytic fn containing arithmetic expr on NULL crashed
|
|
select lag(coalesce(505, 1 + NULL), 1) over (order by int_col desc)
|
|
from functional.alltypestiny
|
|
---- RESULTS
|
|
NULL
|
|
505
|
|
505
|
|
505
|
|
505
|
|
505
|
|
505
|
|
505
|
|
---- TYPES
|
|
SMALLINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1562: AnalyticEvalNode not properly handling nullable tuple
|
|
select max(t3.c1) from
|
|
(select min(t2.string_col) over () c1
|
|
from alltypesagg t1
|
|
left join alltypes t2 on t2.string_col = t1.string_col ) t3
|
|
---- RESULTS
|
|
'0'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1559: FIRST_VALUE rewrite function intermediate type not matching slot type
|
|
select
|
|
first_value(-32.9) over (order by id rows between 92 preceding and unbounded following),
|
|
first_value(1.1) over (order by id rows between 92 preceding and 1 preceding)
|
|
from alltypestiny
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
-32.9,NULL
|
|
-32.9,1.1
|
|
-32.9,1.1
|
|
-32.9,1.1
|
|
-32.9,1.1
|
|
-32.9,1.1
|
|
-32.9,1.1
|
|
-32.9,1.1
|
|
---- TYPES
|
|
DECIMAL, DECIMAL
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1888: FIRST_VALUE produces incorrect results when the window
|
|
# completely precedes all rows in a partition.
|
|
select id, date_string_col,
|
|
first_value(id) over (partition by date_string_col order by id
|
|
rows between 10 preceding and 3 preceding)
|
|
from alltypes where id < 15 order by id
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,'01/01/09',NULL
|
|
1,'01/01/09',NULL
|
|
2,'01/01/09',NULL
|
|
3,'01/01/09',0
|
|
4,'01/01/09',0
|
|
5,'01/01/09',0
|
|
6,'01/01/09',0
|
|
7,'01/01/09',0
|
|
8,'01/01/09',0
|
|
9,'01/01/09',0
|
|
10,'01/02/09',NULL
|
|
11,'01/02/09',NULL
|
|
12,'01/02/09',NULL
|
|
13,'01/02/09',10
|
|
14,'01/02/09',10
|
|
---- TYPES
|
|
INT, STRING, INT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1519: Check that the first analytic sort of a select block
|
|
# materializes TupleIsNullPredicates to be substituted in ancestor nodes.
|
|
select
|
|
sum(t1.id) over (partition by t1.bool_col),
|
|
count(1) over (order by t1.int_col),
|
|
avg(g) over (order by f),
|
|
t2.a,
|
|
t2.d
|
|
from functional.alltypestiny t1
|
|
left outer join
|
|
(select
|
|
id as a,
|
|
coalesce(id, 10) as b,
|
|
int_col as c,
|
|
coalesce(int_col, 20) as d,
|
|
bigint_col e,
|
|
coalesce(bigint_col, 30) as f,
|
|
coalesce(id + bigint_col, 40) as g
|
|
from functional.alltypestiny) t2
|
|
on (t1.id = t2.a + 100)
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
12,4,NULL,NULL,NULL
|
|
12,4,NULL,NULL,NULL
|
|
12,4,NULL,NULL,NULL
|
|
12,4,NULL,NULL,NULL
|
|
16,8,NULL,NULL,NULL
|
|
16,8,NULL,NULL,NULL
|
|
16,8,NULL,NULL,NULL
|
|
16,8,NULL,NULL,NULL
|
|
---- TYPES
|
|
BIGINT, BIGINT, DOUBLE, INT, INT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1519: Check that the first analytic sort of a select block
|
|
# materializes TupleIsNullPredicates to be substituted in ancestor nodes.
|
|
# Same test as the one above, but with an aggregation on top.
|
|
select avg(af1), sum(af3), count(a)
|
|
from
|
|
(select
|
|
sum(t1.id) over (partition by t1.bool_col) af1,
|
|
count(1) over (order by t1.int_col) af2,
|
|
avg(g) over (order by f) af3,
|
|
t2.a,
|
|
t2.d
|
|
from functional.alltypestiny t1
|
|
left outer join
|
|
(select
|
|
id as a,
|
|
coalesce(id, 10) as b,
|
|
int_col as c,
|
|
coalesce(int_col, 20) as d,
|
|
bigint_col e,
|
|
coalesce(bigint_col, 30) as f,
|
|
coalesce(id + bigint_col, 40) as g
|
|
from functional.alltypestiny) t2
|
|
on (t1.id = t2.a + 100)) t3
|
|
group by d
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
14,NULL,0
|
|
---- TYPES
|
|
DOUBLE, DOUBLE, BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1519: Check that expr wrapping with a TupleIsNullPredicate
|
|
# is performed correctly with analytics and multiple nesting levels.
|
|
# The test covers these cases:
|
|
# - analytic output that needs to be wrapped
|
|
# - analytic input that needs to be wrapped
|
|
# - both cases above with and without order by in the over() clause
|
|
# - exprs that need to be wrapped multiple times due to multiple outer joins
|
|
select * from
|
|
(select
|
|
a.id,
|
|
sum(x) over (partition by a.id) as x,
|
|
ifnull(y, 10) as y,
|
|
ifnull(z, "b") as z
|
|
from functional.alltypestiny a
|
|
left outer join
|
|
(select id,
|
|
ifnull(int_col, 1) x,
|
|
count(bigint_col) over(partition by id) y,
|
|
ifnull(string_col, "a") z
|
|
from functional.alltypestiny b) v1
|
|
on (a.id = v1.id)) v2
|
|
full outer join
|
|
(select
|
|
c.id,
|
|
sum(x) over (order by c.id) as x,
|
|
ifnull(y, 10) as y,
|
|
ifnull(z, "b") as z
|
|
from functional.alltypestiny c
|
|
left outer join
|
|
(select id,
|
|
ifnull(int_col, 1) x,
|
|
count(bigint_col) over(order by id) y,
|
|
ifnull(string_col, "a") z
|
|
from functional.alltypestiny d) v3
|
|
on (c.id = v3.id)) v4
|
|
on (v2.id = v4.id)
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,0,1,'0',0,0,1,'0'
|
|
1,1,1,'1',1,1,2,'1'
|
|
2,0,1,'0',2,1,3,'0'
|
|
3,1,1,'1',3,2,4,'1'
|
|
4,0,1,'0',4,2,5,'0'
|
|
5,1,1,'1',5,3,6,'1'
|
|
6,0,1,'0',6,3,7,'0'
|
|
7,1,1,'1',7,4,8,'1'
|
|
---- TYPES
|
|
INT, BIGINT, BIGINT, STRING, INT, BIGINT, BIGINT, STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1519: Check that expr wrapping with a TupleIsNullPredicate
|
|
# is performed correctly with analytics and multiple nesting levels.
|
|
# This is a tricky case from IMPALA-1518 which has a non-materialized
|
|
# analytic in a WITH-clause.
|
|
with w as
|
|
(select coalesce(-668, -335, count(smallint_col)) as int_col,
|
|
lead('670', 87) over
|
|
(order by coalesce(-668, -335, count(smallint_col)) desc) as char_col
|
|
from functional.alltypes)
|
|
select
|
|
lead(-410, 11) over
|
|
(order by (t2.int_col) + (t2.int_col) desc,
|
|
greatest(coalesce(t2.float_col, 16.51),
|
|
coalesce(t2.int_col, 997)) desc) as int_col,
|
|
(coalesce(177, -593, 723)) + (lag(227, 43) over
|
|
(order by (t2.int_col) + (t2.int_col) desc,
|
|
greatest(coalesce(t2.float_col, 16.51),
|
|
coalesce(t2.int_col, 997)) desc)) as int_col_2,
|
|
(t2.int_col) + (t2.int_col) as int_col_3,
|
|
greatest(coalesce(t2.float_col, 16.51), coalesce(t2.int_col, 997)) as float_col,
|
|
coalesce(t1.int_col, t1.int_col, t2.int_col) as int_col_4
|
|
from w t1
|
|
right outer join
|
|
(select (t1.month) + (t1.year) as int_col,
|
|
least(coalesce(t1.float_col, 683.98), -213.01) as float_col
|
|
from functional.alltypestiny t1
|
|
where (t1.int_col) not in (t1.smallint_col, t1.smallint_col)) t2
|
|
on (t2.int_col = t1.int_col and t2.int_col = t1.int_col and t2.int_col = t1.int_col)
|
|
---- RESULTS
|
|
---- TYPES
|
|
SMALLINT, INT, BIGINT, DOUBLE, BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1946: Check that On-clause predicates of an outer join assigned in a scan
|
|
# are not wrapped in TupleIsNullPredicates.
|
|
select /* +straight_join */ a.id, b.id
|
|
from functional.alltypestiny a
|
|
left outer join
|
|
(select t1.id, ifnull(t1.int_col, 10) as int_col
|
|
from functional.alltypestiny t1
|
|
inner join functional.alltypestiny t2
|
|
on (t1.id = t2.id)) b
|
|
on (a.id = b.id and b.int_col < 10)
|
|
---- RESULTS
|
|
0,0
|
|
1,1
|
|
2,2
|
|
3,3
|
|
4,4
|
|
5,5
|
|
6,6
|
|
7,7
|
|
---- TYPES
|
|
INT, INT
|
|
====
|
|
---- QUERY
|
|
# Test percent_rank() with multiple partition exprs and multiple order by exprs and
|
|
# compare with the rewrite target query
|
|
select t.year, t.month, t.id, t.pr, (t.r-1)/(t.c-1)
|
|
from (select year, month, id, rank() over (partition by year, month order by id) as r,
|
|
count() over(partition by year, month) as c,
|
|
percent_rank() over(partition by year, month order by id) as pr
|
|
from functional.alltypes where tinyint_col < 2 and month < 3 and (id % 100 < 10)) as t
|
|
order by t.year, t.month, t.id;
|
|
---- RESULTS
|
|
2009,1,0,0,0
|
|
2009,1,1,0.1428571428571428,0.1428571428571428
|
|
2009,1,100,0.2857142857142857,0.2857142857142857
|
|
2009,1,101,0.4285714285714285,0.4285714285714285
|
|
2009,1,200,0.5714285714285714,0.5714285714285714
|
|
2009,1,201,0.7142857142857143,0.7142857142857143
|
|
2009,1,300,0.8571428571428571,0.8571428571428571
|
|
2009,1,301,1,1
|
|
2009,2,400,0,0
|
|
2009,2,401,0.3333333333333333,0.3333333333333333
|
|
2009,2,500,0.6666666666666666,0.6666666666666666
|
|
2009,2,501,1,1
|
|
2010,1,3700,0,0
|
|
2010,1,3701,0.2,0.2
|
|
2010,1,3800,0.4,0.4
|
|
2010,1,3801,0.6,0.6
|
|
2010,1,3900,0.8,0.8
|
|
2010,1,3901,1,1
|
|
2010,2,4000,0,0
|
|
2010,2,4001,0.2,0.2
|
|
2010,2,4100,0.4,0.4
|
|
2010,2,4101,0.6,0.6
|
|
2010,2,4200,0.8,0.8
|
|
2010,2,4201,1,1
|
|
---- TYPES
|
|
INT, INT, INT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Test cume_dist() with multiple partition exprs and multiple order by exprs
|
|
# and compare with the rewrite query
|
|
select t.year, t.month, t.id, t.cd, ((t.c-t.r)+1)/t.c
|
|
from (select year, month, id,
|
|
rank() over (partition by year, month order by id desc) as r,
|
|
count() over(partition by year, month) as c,
|
|
cume_dist() over(partition by year, month order by id) as cd
|
|
from functional.alltypes where tinyint_col < 2 and month < 3 and (id % 100 < 10)) as t
|
|
order by t.year, t.month, t.id;
|
|
---- RESULTS
|
|
2009,1,0,0.125,0.125
|
|
2009,1,1,0.25,0.25
|
|
2009,1,100,0.375,0.375
|
|
2009,1,101,0.5,0.5
|
|
2009,1,200,0.625,0.625
|
|
2009,1,201,0.75,0.75
|
|
2009,1,300,0.875,0.875
|
|
2009,1,301,1,1
|
|
2009,2,400,0.25,0.25
|
|
2009,2,401,0.5,0.5
|
|
2009,2,500,0.75,0.75
|
|
2009,2,501,1,1
|
|
2010,1,3700,0.1666666666666667,0.1666666666666667
|
|
2010,1,3701,0.3333333333333333,0.3333333333333333
|
|
2010,1,3800,0.5,0.5
|
|
2010,1,3801,0.6666666666666666,0.6666666666666666
|
|
2010,1,3900,0.8333333333333334,0.8333333333333334
|
|
2010,1,3901,1,1
|
|
2010,2,4000,0.1666666666666667,0.1666666666666667
|
|
2010,2,4001,0.3333333333333333,0.3333333333333333
|
|
2010,2,4100,0.5,0.5
|
|
2010,2,4101,0.6666666666666666,0.6666666666666666
|
|
2010,2,4200,0.8333333333333334,0.8333333333333334
|
|
2010,2,4201,1,1
|
|
---- TYPES
|
|
INT, INT, INT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Test ntile() with multiple partition exprs and multiple order by exprs and compare
|
|
# with the rewrite query
|
|
select t.year, t.month, t.id, t.nt, floor((if(t.c>3, 3, t.c)*(t.rownum-1))/t.c) + 1
|
|
from (select year, month, id,
|
|
row_number() over (partition by year, month order by id) as rownum,
|
|
count() over(partition by year, month) as c,
|
|
ntile(3) over(partition by year, month order by id) as nt
|
|
from functional.alltypes where tinyint_col < 2 and month < 3 and (id % 100 < 10)) as t
|
|
order by t.year, t.month, t.id;
|
|
---- RESULTS
|
|
2009,1,0,1,1
|
|
2009,1,1,1,1
|
|
2009,1,100,1,1
|
|
2009,1,101,2,2
|
|
2009,1,200,2,2
|
|
2009,1,201,2,2
|
|
2009,1,300,3,3
|
|
2009,1,301,3,3
|
|
2009,2,400,1,1
|
|
2009,2,401,1,1
|
|
2009,2,500,2,2
|
|
2009,2,501,3,3
|
|
2010,1,3700,1,1
|
|
2010,1,3701,1,1
|
|
2010,1,3800,2,2
|
|
2010,1,3801,2,2
|
|
2010,1,3900,3,3
|
|
2010,1,3901,3,3
|
|
2010,2,4000,1,1
|
|
2010,2,4001,1,1
|
|
2010,2,4100,2,2
|
|
2010,2,4101,2,2
|
|
2010,2,4200,3,3
|
|
2010,2,4201,3,3
|
|
---- TYPES
|
|
INT, INT, INT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test for ntile when the argument is larger than number of rows
|
|
select year, month, id, ntile(1000) over(order by year, month, id) from alltypes where
|
|
tinyint_col < 2 and month < 3 and (id % 100 < 10) order by year, month, id
|
|
---- RESULTS
|
|
2009,1,0,1
|
|
2009,1,1,2
|
|
2009,1,100,3
|
|
2009,1,101,4
|
|
2009,1,200,5
|
|
2009,1,201,6
|
|
2009,1,300,7
|
|
2009,1,301,8
|
|
2009,2,400,9
|
|
2009,2,401,10
|
|
2009,2,500,11
|
|
2009,2,501,12
|
|
2010,1,3700,13
|
|
2010,1,3701,14
|
|
2010,1,3800,15
|
|
2010,1,3801,16
|
|
2010,1,3900,17
|
|
2010,1,3901,18
|
|
2010,2,4000,19
|
|
2010,2,4001,20
|
|
2010,2,4100,21
|
|
2010,2,4101,22
|
|
2010,2,4200,23
|
|
2010,2,4201,24
|
|
---- TYPES
|
|
INT, INT, INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# percent_rank() and cume_dist() with inline views
|
|
select bool_col, a, b, a + b from
|
|
(select
|
|
bool_col,
|
|
percent_rank() over(order by id) a,
|
|
cume_dist() over(partition by bool_col order by id) b
|
|
from
|
|
(select * from functional.alltypes where id < 20) v1) v2
|
|
order by 1, 2, 3
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
false,0.05263157894736842,0.1,0.1526315789473684
|
|
false,0.1578947368421053,0.2,0.3578947368421053
|
|
false,0.2631578947368421,0.3,0.5631578947368421
|
|
false,0.3684210526315789,0.4,0.7684210526315789
|
|
false,0.4736842105263158,0.5,0.9736842105263157
|
|
false,0.5789473684210527,0.6,1.178947368421053
|
|
false,0.6842105263157895,0.7,1.38421052631579
|
|
false,0.7894736842105263,0.8,1.589473684210526
|
|
false,0.8947368421052632,0.9,1.794736842105263
|
|
false,1,1,2
|
|
true,0,0.1,0.1
|
|
true,0.1052631578947368,0.2,0.3052631578947368
|
|
true,0.2105263157894737,0.3,0.5105263157894737
|
|
true,0.3157894736842105,0.4,0.7157894736842105
|
|
true,0.4210526315789473,0.5,0.9210526315789473
|
|
true,0.5263157894736842,0.6,1.126315789473684
|
|
true,0.631578947368421,0.7,1.331578947368421
|
|
true,0.7368421052631579,0.8,1.536842105263158
|
|
true,0.8421052631578947,0.9,1.742105263157895
|
|
true,0.9473684210526315,1,1.947368421052631
|
|
---- TYPES
|
|
BOOLEAN, DOUBLE, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
select 1 a, cume_dist() over (order by int_col) as cd from alltypes where id < 5
|
|
union
|
|
distinct (select 2, percent_rank() over (partition by bool_col order by int_col)
|
|
from alltypesagg where id >= 5 and id <= 10) order by a, cd;
|
|
---- RESULTS
|
|
1,0.2
|
|
1,0.4
|
|
1,0.6
|
|
1,0.8
|
|
1,1
|
|
2,0
|
|
2,0.333333333333333
|
|
2,0.5
|
|
2,0.666666666666666
|
|
2,1
|
|
---- TYPES
|
|
TINYINT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Test analytic function rewrite when the same expr appears twice
|
|
select id, percent_rank() over(order by id) from functional.alltypestiny
|
|
order by percent_rank() over (order by id)
|
|
---- RESULTS
|
|
0,0
|
|
1,0.1428571428571428
|
|
2,0.2857142857142857
|
|
3,0.4285714285714285
|
|
4,0.5714285714285714
|
|
5,0.7142857142857143
|
|
6,0.8571428571428571
|
|
7,1
|
|
---- TYPES
|
|
INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Tests IMPALA-2158. Check that analytic functions with a limit succeed.
|
|
select id, rank() over(order by id) from functional.alltypesagg limit 10
|
|
---- RESULTS
|
|
0,1
|
|
0,1
|
|
1,3
|
|
2,4
|
|
3,5
|
|
4,6
|
|
5,7
|
|
6,8
|
|
7,9
|
|
8,10
|
|
---- TYPES
|
|
INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-2829: MemPool mem transfer to the output rowbatch
|
|
# may result in a crash when the mem transfer occurs right before eos. This
|
|
# repro was crafted to hit the crash by playing with the window and row sizes
|
|
# so that memory allocated within the analytic node is just large enough (>8mb)
|
|
# to be transfered to the output row batch immediately before eos.
|
|
select max(t3.c1), max(t3.c2)
|
|
from (
|
|
select
|
|
avg( t1.timestamp_col )
|
|
over (order by t1.id, t2.id rows between 5000 following and 50000 following) c1,
|
|
avg( t2.timestamp_col )
|
|
over (order by t1.id, t2.id rows between 5000 following and 50000 following) c2
|
|
from alltypesagg t1 join alltypesagg t2 where t1.int_col = t2.int_col
|
|
) t3
|
|
---- RESULTS
|
|
2010-01-10 18:02:05.234931468,2010-01-10 18:02:05.215156078
|
|
---- TYPES
|
|
TIMESTAMP, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# IMPALA-2457 - PERCENT_RANK() returns NaN for row group with 1 row.
|
|
select id, rank() over (partition by id order by tinyint_col),
|
|
count() over (partition by id), percent_rank() over (partition by id order by tinyint_col)
|
|
from functional.alltypestiny order by id
|
|
---- RESULTS
|
|
0,1,1,0
|
|
1,1,1,0
|
|
2,1,1,0
|
|
3,1,1,0
|
|
4,1,1,0
|
|
5,1,1,0
|
|
6,1,1,0
|
|
7,1,1,0
|
|
---- TYPES
|
|
int, bigint, bigint, double
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-2265, IMPALA-2559. The max_block_mgr_memory is tuned to
|
|
# reproduce the issue when running this query against functional_parquet.
|
|
SET max_block_mgr_memory=16m;
|
|
SELECT lag(-180, 13) over (ORDER BY t1.int_col ASC, t2.int_col ASC) AS int_col
|
|
FROM functional_parquet.alltypes t1 CROSS JOIN functional_parquet.alltypes t2 LIMIT 10;
|
|
---- CATCH
|
|
Memory limit exceeded
|
|
====
|
|
---- QUERY
|
|
# Check that the above query can succeed with the minimum buffers (3 buffers for sort,
|
|
# 1 buffer for analytic).
|
|
SET max_block_mgr_memory=32m;
|
|
SELECT lag(-180, 13) over (ORDER BY t1.int_col ASC, t2.int_col ASC) AS int_col
|
|
FROM functional_parquet.alltypes t1 CROSS JOIN functional_parquet.alltypes t2 LIMIT 10;
|
|
---- TYPES
|
|
SMALLINT
|
|
---- RESULTS
|
|
NULL
|
|
NULL
|
|
NULL
|
|
NULL
|
|
NULL
|
|
NULL
|
|
NULL
|
|
NULL
|
|
NULL
|
|
NULL
|
|
====
|
|
---- QUERY
|
|
# IMPALA-2906: Tests that an analytic sort does not attempt to materialize
|
|
# a constant expr wrapped in a TupleIsNullPredicate that is not executable
|
|
# at the analytic sort. Here 'y' is wrapped in a TupleIsNullPredicate, and
|
|
# the analytic sort of the top-level block should not attempt to materialize
|
|
# that wrapped constant expr.
|
|
SELECT
|
|
LEAD(996, 13) OVER (ORDER BY t1.int_col)
|
|
FROM functional.alltypestiny t1
|
|
LEFT JOIN (
|
|
SELECT
|
|
v1.x,
|
|
(-560) IN (694, 44) AS y
|
|
FROM (
|
|
SELECT
|
|
LAG(bigint_col, 34) OVER (ORDER BY t2.int_col) AS x
|
|
FROM functional.alltypestiny t2
|
|
) v1
|
|
) v2
|
|
ON v2.x = t1.bigint_col AND v2.y = t1.bigint_col
|
|
GROUP BY t1.int_col
|
|
---- TYPES
|
|
SMALLINT
|
|
---- RESULTS
|
|
NULL
|
|
NULL
|
|
====
|
|
---- QUERY
|
|
# Start IGNORE NULLS
|
|
# Test all possible combinations of bound types for last_value that don't get rewritten.
|
|
select id, tinyint_col,
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between unbounded preceding and 1 preceding),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between unbounded preceding and current row),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between unbounded preceding and 1 following),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between unbounded preceding and unbounded following),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 2 preceding and 1 preceding),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 2 preceding and current row),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 1 preceding and 2 following),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between current row and current row),
|
|
last_value(cast(tinyint_col as string) ignore nulls) over
|
|
(order by id rows between current row and 1 following),
|
|
last_value(cast(tinyint_col as string) ignore nulls) over
|
|
(order by id rows between 1 following and 2 following),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id range between unbounded preceding and current row),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id range between unbounded preceding and unbounded following)
|
|
from functional.alltypesagg where id < 21
|
|
---- TYPES
|
|
INT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, STRING, STRING, TINYINT, TINYINT
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,NULL,NULL,NULL,NULL,9,NULL,NULL,1,NULL,'NULL','1',NULL,9
|
|
0,NULL,NULL,NULL,1,9,NULL,NULL,2,NULL,'1','2',NULL,9
|
|
1,1,NULL,1,2,9,NULL,1,3,1,'2','3',1,9
|
|
2,2,1,2,3,9,1,2,4,2,'3','4',2,9
|
|
3,3,2,3,4,9,2,3,5,3,'4','5',3,9
|
|
4,4,3,4,5,9,3,4,6,4,'5','6',4,9
|
|
5,5,4,5,6,9,4,5,7,5,'6','7',5,9
|
|
6,6,5,6,7,9,5,6,8,6,'7','8',6,9
|
|
7,7,6,7,8,9,6,7,9,7,'8','9',7,9
|
|
8,8,7,8,9,9,7,8,9,8,'9','9',8,9
|
|
9,9,8,9,9,9,8,9,9,9,'9','NULL',9,9
|
|
10,NULL,9,9,9,9,9,9,1,NULL,'NULL','1',9,9
|
|
10,NULL,9,9,1,9,9,9,2,NULL,'1','2',9,9
|
|
11,1,9,1,2,9,NULL,1,3,1,'2','3',1,9
|
|
12,2,1,2,3,9,1,2,4,2,'3','4',2,9
|
|
13,3,2,3,4,9,2,3,5,3,'4','5',3,9
|
|
14,4,3,4,5,9,3,4,6,4,'5','6',4,9
|
|
15,5,4,5,6,9,4,5,7,5,'6','7',5,9
|
|
16,6,5,6,7,9,5,6,8,6,'7','8',6,9
|
|
17,7,6,7,8,9,6,7,9,7,'8','9',7,9
|
|
18,8,7,8,9,9,7,8,9,8,'9','9',8,9
|
|
19,9,8,9,9,9,8,9,9,9,'9','NULL',9,9
|
|
20,NULL,9,9,9,9,9,9,9,NULL,'NULL','NULL',9,9
|
|
20,NULL,9,9,9,9,9,9,NULL,NULL,'NULL','NULL',9,9
|
|
====
|
|
---- QUERY
|
|
# Test the remaining combinations of bound types. These are handled separately from above
|
|
# because they are rewritten as first_value() functions, so their results can be returned
|
|
# in a different order, since there isn't a total ordering on id.
|
|
select id, tinyint_col,
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 1 preceding and unbounded following),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between current row and unbounded following),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 2 following and unbounded following),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id range between current row and unbounded following)
|
|
from functional.alltypesagg where id < 21 order by id
|
|
---- TYPES
|
|
INT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,NULL,9,9,9,9
|
|
0,NULL,9,9,9,9
|
|
1,1,9,9,9,9
|
|
2,2,9,9,9,9
|
|
3,3,9,9,9,9
|
|
4,4,9,9,9,9
|
|
5,5,9,9,9,9
|
|
6,6,9,9,9,9
|
|
7,7,9,9,9,9
|
|
8,8,9,9,9,9
|
|
9,9,9,9,9,9
|
|
10,NULL,9,9,9,9
|
|
10,NULL,9,9,9,9
|
|
11,1,9,9,9,9
|
|
12,2,9,9,9,9
|
|
13,3,9,9,9,9
|
|
14,4,9,9,9,9
|
|
15,5,9,9,9,9
|
|
16,6,9,9,9,9
|
|
17,7,9,9,9,9
|
|
18,8,9,9,NULL,9
|
|
19,9,9,9,NULL,9
|
|
20,NULL,9,NULL,NULL,NULL
|
|
20,NULL,NULL,NULL,NULL,NULL
|
|
====
|
|
---- QUERY
|
|
# Test all combinations of bound types for first_value that don't get rewritten.
|
|
select id, tinyint_col,
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between unbounded preceding and 1 preceding),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between unbounded preceding and current row),
|
|
first_value(cast(tinyint_col as string) ignore nulls) over
|
|
(order by id rows between unbounded preceding and 1 following),
|
|
first_value(cast(tinyint_col as string) ignore nulls) over
|
|
(order by id rows between unbounded preceding and unbounded following),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id range between unbounded preceding and current row),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id range between unbounded preceding and unbounded following)
|
|
from functional.alltypesagg where id < 21
|
|
---- TYPES
|
|
INT, TINYINT, TINYINT, TINYINT, STRING, STRING, TINYINT, TINYINT
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,NULL,NULL,NULL,'NULL','1',NULL,1
|
|
0,NULL,NULL,NULL,'1','1',NULL,1
|
|
1,1,NULL,1,'1','1',1,1
|
|
2,2,1,1,'1','1',1,1
|
|
3,3,1,1,'1','1',1,1
|
|
4,4,1,1,'1','1',1,1
|
|
5,5,1,1,'1','1',1,1
|
|
6,6,1,1,'1','1',1,1
|
|
7,7,1,1,'1','1',1,1
|
|
8,8,1,1,'1','1',1,1
|
|
9,9,1,1,'1','1',1,1
|
|
10,NULL,1,1,'1','1',1,1
|
|
10,NULL,1,1,'1','1',1,1
|
|
11,1,1,1,'1','1',1,1
|
|
12,2,1,1,'1','1',1,1
|
|
13,3,1,1,'1','1',1,1
|
|
14,4,1,1,'1','1',1,1
|
|
15,5,1,1,'1','1',1,1
|
|
16,6,1,1,'1','1',1,1
|
|
17,7,1,1,'1','1',1,1
|
|
18,8,1,1,'1','1',1,1
|
|
19,9,1,1,'1','1',1,1
|
|
20,NULL,1,1,'1','1',1,1
|
|
20,NULL,1,1,'1','1',1,1
|
|
====
|
|
---- QUERY
|
|
# Test combinations of bound types for first_value that get rewritten as last_value.
|
|
select id, tinyint_col,
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 2 preceding and 1 preceding),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 1 preceding and current row),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 1 preceding and 1 following),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 1 preceding and unbounded following),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between current row and current row),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between current row and 1 following),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between current row and unbounded following),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 1 following and 2 following),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 2 following and unbounded following),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id range between current row and unbounded following)
|
|
from functional.alltypesagg where id < 21
|
|
---- TYPES
|
|
INT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,NULL,NULL,NULL,NULL,1,NULL,NULL,1,1,1,1
|
|
0,NULL,NULL,NULL,1,1,NULL,1,1,1,2,1
|
|
1,1,NULL,1,1,1,1,1,1,2,3,1
|
|
2,2,1,1,1,1,2,2,2,3,4,2
|
|
3,3,1,2,2,2,3,3,3,4,5,3
|
|
4,4,2,3,3,3,4,4,4,5,6,4
|
|
5,5,3,4,4,4,5,5,5,6,7,5
|
|
6,6,4,5,5,5,6,6,6,7,8,6
|
|
7,7,5,6,6,6,7,7,7,8,9,7
|
|
8,8,6,7,7,7,8,8,8,9,1,8
|
|
9,9,7,8,8,8,9,9,9,NULL,1,9
|
|
10,NULL,8,9,9,9,NULL,NULL,1,1,1,1
|
|
10,NULL,9,NULL,1,1,NULL,1,1,1,2,1
|
|
11,1,NULL,1,1,1,1,1,1,2,3,1
|
|
12,2,1,1,1,1,2,2,2,3,4,2
|
|
13,3,1,2,2,2,3,3,3,4,5,3
|
|
14,4,2,3,3,3,4,4,4,5,6,4
|
|
15,5,3,4,4,4,5,5,5,6,7,5
|
|
16,6,4,5,5,5,6,6,6,7,8,6
|
|
17,7,5,6,6,6,7,7,7,8,9,7
|
|
18,8,6,7,7,7,8,8,8,9,NULL,8
|
|
19,9,7,8,8,8,9,9,9,NULL,NULL,9
|
|
20,NULL,8,9,9,9,NULL,NULL,NULL,NULL,NULL,NULL
|
|
20,NULL,9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL
|
|
====
|
|
---- QUERY
|
|
# Test all possible types for last_value (except decimal).
|
|
select id,
|
|
last_value(bool_col ignore nulls) over (order by id),
|
|
last_value(tinyint_col ignore nulls) over (order by id),
|
|
last_value(smallint_col ignore nulls) over (order by id),
|
|
last_value(int_col ignore nulls) over (order by id),
|
|
last_value(bigint_col ignore nulls) over (order by id),
|
|
last_value(float_col ignore nulls) over (order by id),
|
|
last_value(double_col ignore nulls) over (order by id),
|
|
last_value(string_col ignore nulls) over (order by id),
|
|
last_value(timestamp_col ignore nulls) over (order by id)
|
|
from functional.alltypesagg where id < 5
|
|
---- TYPES
|
|
INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, TIMESTAMP
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'0',2010-01-01 00:00:00
|
|
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'0',2010-01-01 00:00:00
|
|
1,false,1,1,1,10,1.100000023841858,10.1,'1',2010-01-01 00:01:00
|
|
2,true,2,2,2,20,2.200000047683716,20.2,'2',2010-01-01 00:02:00.100000000
|
|
3,false,3,3,3,30,3.299999952316284,30.3,'3',2010-01-01 00:03:00.300000000
|
|
4,true,4,4,4,40,4.400000095367432,40.4,'4',2010-01-01 00:04:00.600000000
|
|
====
|
|
---- QUERY
|
|
# Test all possible types for first_value (except decimal).
|
|
select id,
|
|
first_value(bool_col ignore nulls) over (order by id),
|
|
first_value(tinyint_col ignore nulls) over (order by id),
|
|
first_value(smallint_col ignore nulls) over (order by id),
|
|
first_value(int_col ignore nulls) over (order by id),
|
|
first_value(bigint_col ignore nulls) over (order by id),
|
|
first_value(float_col ignore nulls) over (order by id),
|
|
first_value(double_col ignore nulls) over (order by id),
|
|
first_value(string_col ignore nulls) over (order by id),
|
|
first_value(timestamp_col ignore nulls) over (order by id)
|
|
from functional.alltypesagg where id < 5
|
|
---- TYPES
|
|
INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, TIMESTAMP
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'0',2010-01-01 00:00:00
|
|
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'0',2010-01-01 00:00:00
|
|
1,true,1,1,1,10,1.100000023841858,10.1,'0',2010-01-01 00:00:00
|
|
2,true,1,1,1,10,1.100000023841858,10.1,'0',2010-01-01 00:00:00
|
|
3,true,1,1,1,10,1.100000023841858,10.1,'0',2010-01-01 00:00:00
|
|
4,true,1,1,1,10,1.100000023841858,10.1,'0',2010-01-01 00:00:00
|
|
====
|
|
---- QUERY
|
|
select
|
|
last_value(d1 ignore nulls) over (order by d1),
|
|
first_value(d1 ignore nulls) over (order by d1)
|
|
from functional.decimal_tbl
|
|
---- TYPES
|
|
DECIMAL, DECIMAL
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
1234,1234
|
|
2345,1234
|
|
12345,1234
|
|
12345,1234
|
|
132842,1234
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-4120: Invoke UDFs which do local allocations and verifies
|
|
# that the results are copied out.
|
|
select count(*) from (
|
|
select
|
|
from_unixtime(lead(bigint_col, 1) over (order by id), 'yyyyMMddHH:mm:ss') as a,
|
|
lead(from_unixtime(bigint_col, 'yyyyMMddHH:mm:ss'), 1) over (order by id) AS b
|
|
from functional.alltypes) x
|
|
where x.a = x.b
|
|
---- TYPES
|
|
BIGINT
|
|
---- RESULTS
|
|
7299
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-4518: Distinguishes between empty string and null string.
|
|
select f,lead(b,1,null) over (order by f)
|
|
from (select * from nulltable union all select * from nulltable) x;
|
|
---- TYPES
|
|
STRING, STRING
|
|
---- RESULTS
|
|
'a\x00b',''
|
|
'a\x00b','NULL'
|
|
====
|