Files
impala/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
Matthew Jacobs e8527ddb8e IMPALA-1888: FIRST_VALUE may produce incorrect results with preceding windows
Fixes a bug where FIRST_VALUE may produce incorrect results (or a DCHECK
failure in debug) when there is a window like "ROWS X PRECEDING Y PRECEDING",
such that X < Y and X > the size of a partition.

For windows with an end boundary that is PRECEDING (i.e.
the entire window is before a row), there is some special handling between
partitions, and the logic was not correct in some corner cases for FIRST_VALUE.

Change-Id: Ied5d440684e99dcaf60b47489c90300891f09b91
Reviewed-on: http://gerrit.cloudera.org:8080/236
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Internal Jenkins
2015-03-20 14:37:19 -07:00

1149 lines
32 KiB
Plaintext

====
---- QUERY
select year, month,
count(int_col) over (partition by year, month),
avg(int_col) over (partition by year, month),
avg(timestamp_col) over (partition by year, month),
min(string_col) over (partition by year, month),
max(string_col) over (partition by year, month)
from alltypessmall
where id % 4 = 0 and month != 1;
---- RESULTS: VERIFY_IS_EQUAL_SORTED
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
---- TYPES
INT, INT, BIGINT, DOUBLE, TIMESTAMP, STRING, STRING
====
---- QUERY
select int_col,
count(int_col) over ()
from alltypessmall
where id % 4 = 0 and month = 1
order by int_col
---- RESULTS
0,7
0,7
2,7
4,7
4,7
6,7
8,7
---- TYPES
INT, BIGINT
====
---- QUERY
select tinyint_col,
count(int_col) over (order by tinyint_col)
from alltypessmall
where month = 1 and tinyint_col < 5
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,3
0,3
0,3
1,6
1,6
1,6
2,9
2,9
2,9
3,12
3,12
3,12
4,15
4,15
4,15
---- TYPES
TINYINT, BIGINT
====
---- QUERY
# Test ordering expressions having NULL values
select tinyint_col, smallint_col, int_col,
sum(smallint_col) over (order by tinyint_col nulls first)
from alltypesagg
where year = 2010 and month = 1 and day = 4 and id <= 3030 and bool_col = true
order by tinyint_col, id, smallint_col, int_col
---- RESULTS
2,2,2,96
2,12,12,96
2,22,22,96
4,4,4,138
4,14,14,138
4,24,24,138
6,6,6,186
6,16,16,186
6,26,26,186
8,8,8,240
8,18,18,240
8,28,28,240
NULL,NULL,NULL,60
NULL,10,10,60
NULL,20,20,60
NULL,30,30,60
---- TYPES
TINYINT, SMALLINT, INT, BIGINT
====
---- QUERY
# Test partitions and ordering expressions having NULL values for both
# the default window and ROWS windows.
select tinyint_col, smallint_col,
sum(smallint_col) over (partition by tinyint_col order by smallint_col),
sum(smallint_col) over (partition by tinyint_col order by smallint_col
rows between unbounded preceding and 1 following),
sum(smallint_col) over (partition by tinyint_col order by smallint_col
rows between 1 following and 2 following)
from alltypesagg
where year = 2010 and month = 1 and day = 4 and id <= 3200
and (smallint_col < 50 or smallint_col is NULL)
and (tinyint_col = 1 or tinyint_col is NULL)
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,1,2,2,12
1,1,2,13,22
1,11,24,24,32
1,11,24,45,42
1,21,66,66,52
1,21,66,97,62
1,31,128,128,72
1,31,128,169,82
1,41,210,210,41
1,41,210,210,NULL
NULL,10,20,20,30
NULL,10,20,40,40
NULL,20,60,60,50
NULL,20,60,90,60
NULL,30,120,120,70
NULL,30,120,160,80
NULL,40,200,200,40
NULL,40,200,200,NULL
NULL,NULL,200,200,NULL
NULL,NULL,200,200,NULL
NULL,NULL,200,200,NULL
---- TYPES
TINYINT, SMALLINT, BIGINT, BIGINT, BIGINT
====
---- QUERY
# Test sum() NULL handling with different sliding windows (sum() has special NULL
# handling in Remove()).
select
sum(tinyint_col) over (order by id nulls last rows between 1 preceding and 1 preceding),
sum(tinyint_col) over (order by id nulls last rows between 1 preceding and current row),
sum(tinyint_col) over (order by id nulls last rows between current row and 1 following),
sum(tinyint_col) over (order by id nulls last rows between 1 following and 1 following)
from alltypesagg where id < 30 and (tinyint_col is null or tinyint_col < 6)
---- RESULTS: VERIFY_IS_EQUAL_SORTED
NULL,NULL,NULL,NULL
NULL,NULL,1,1
NULL,1,3,2
1,3,5,3
2,5,7,4
3,7,9,5
4,9,5,NULL
5,5,NULL,NULL
NULL,NULL,1,1
NULL,1,3,2
1,3,5,3
2,5,7,4
3,7,9,5
4,9,5,NULL
5,5,NULL,NULL
NULL,NULL,1,1
NULL,1,3,2
1,3,5,3
2,5,7,4
3,7,9,5
4,9,5,NULL
---- TYPES
BIGINT, BIGINT, BIGINT, BIGINT
====
---- QUERY
# Test multiple partition exprs and multiple order by exprs
select year, month, bool_col, tinyint_col,
count(id) over (partition by year, month order by bool_col, tinyint_col)
from alltypes
where tinyint_col < 2 and month < 3 and (id % 100 < 10)
order by year, month, bool_col, tinyint_col
---- RESULTS
2009,1,false,1,4
2009,1,false,1,4
2009,1,false,1,4
2009,1,false,1,4
2009,1,true,0,8
2009,1,true,0,8
2009,1,true,0,8
2009,1,true,0,8
2009,2,false,1,2
2009,2,false,1,2
2009,2,true,0,4
2009,2,true,0,4
2010,1,false,1,3
2010,1,false,1,3
2010,1,false,1,3
2010,1,true,0,6
2010,1,true,0,6
2010,1,true,0,6
2010,2,false,1,3
2010,2,false,1,3
2010,2,false,1,3
2010,2,true,0,6
2010,2,true,0,6
2010,2,true,0,6
---- TYPES
INT, INT, BOOLEAN, TINYINT, BIGINT
====
---- QUERY
# Same as above but with no ordering and a limit. Only count the number of results
# because they are non-deterministic.
select count(*) from
(select year, month, bool_col, tinyint_col,
count(id) over (partition by year, month order by bool_col, tinyint_col)
from alltypes
where tinyint_col < 2 and month < 3 and (id % 100 < 10)
limit 7) v
---- RESULTS
7
---- TYPES
BIGINT
====
---- QUERY
# Simple ROWS window
select (month % 2), int_col,
sum(int_col) over (partition by (month % 2)
order by int_col rows between unbounded preceding and current row)
from alltypestiny
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,0,0
0,0,0
0,1,1
0,1,2
1,0,0
1,0,0
1,1,1
1,1,2
---- TYPES
INT, INT, BIGINT
====
---- QUERY
# ROWS windows with an end boundary offset, some before or after all results.
select
count(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 1 preceding),
sum(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 1 following),
sum(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 3 preceding),
sum(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 20 following),
sum(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 20 preceding)
from alltypesagg where id <= 10 and day = 1
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,4,NULL,25,NULL
1,9,NULL,25,NULL
2,16,NULL,25,NULL
3,25,1,25,NULL
4,25,4,25,NULL
0,6,NULL,20,NULL
1,12,NULL,20,NULL
2,20,NULL,20,NULL
3,20,2,20,NULL
4,20,6,20,NULL
4,20,12,20,NULL
---- TYPES
BIGINT, BIGINT, BIGINT, BIGINT, BIGINT
====
---- QUERY
# multiple analytics in the same select block
select tinyint_col, int_col, bigint_col,
count(bigint_col)
over(partition by tinyint_col, bigint_col order by int_col desc
rows between unbounded preceding and 1 following),
max(tinyint_col)
over(partition by bigint_col, tinyint_col order by int_col desc
rows between unbounded preceding and 1 following),
# different window
min(int_col)
over(partition by bigint_col, tinyint_col order by int_col desc
rows between unbounded preceding and 2 following),
# different sort order but same partition
max(int_col)
over(partition by bigint_col, tinyint_col order by int_col asc
rows between unbounded preceding and 2 following),
# different partition
sum(int_col)
over(partition by tinyint_col order by int_col desc
rows between unbounded preceding and 2 following)
from functional.alltypesagg
where id < 20
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,1,10,1,1,1,1,12
1,11,110,1,1,11,11,12
2,2,20,1,2,2,2,14
2,12,120,1,2,12,12,14
3,3,30,1,3,3,3,16
3,13,130,1,3,13,13,16
4,4,40,1,4,4,4,18
4,14,140,1,4,14,14,18
5,5,50,1,5,5,5,20
5,15,150,1,5,15,15,20
6,6,60,1,6,6,6,22
6,16,160,1,6,16,16,22
7,7,70,1,7,7,7,24
7,17,170,1,7,17,17,24
8,8,80,1,8,8,8,26
8,18,180,1,8,18,18,26
9,9,90,1,9,9,9,28
9,19,190,1,9,19,19,28
NULL,10,100,2,NULL,10,10,20
NULL,10,100,2,NULL,10,10,20
NULL,NULL,NULL,0,NULL,NULL,NULL,10
NULL,NULL,NULL,0,NULL,NULL,NULL,20
---- TYPES
TINYINT, INT, BIGINT, BIGINT, TINYINT, INT, INT, BIGINT
====
---- QUERY
# analytics on a grouped aggregation with a final order by
select bool_col,
sum(min(int_col))
over(partition by min(tinyint_col) order by max(int_col)
rows between unbounded preceding and 1 following),
max(sum(bigint_col))
over(partition by min(tinyint_col) order by max(int_col)
rows between unbounded preceding and 1 following),
min(sum(bigint_col))
over(partition by min(tinyint_col) order by sum(int_col)
rows between unbounded preceding and 4 following)
from functional.alltypes
group by 1
order by 1, 2, 3
---- RESULTS
false,1,182500,182500
true,0,146000,146000
---- TYPES
BOOLEAN, BIGINT, BIGINT, BIGINT
====
---- QUERY
# Test for IMPALA-1200
select DISTINCT t1.c1 FROM
(SELECT MAX(smallint_col) OVER (PARTITION BY year ORDER BY id DESC) AS c1
FROM alltypesagg) t1
---- RESULTS
99
---- TYPES
SMALLINT
====
---- QUERY
# Test for IMPALA-1233
with with_1 AS (
SELECT RANK() OVER (ORDER BY t1.id) AS int_col_1
FROM alltypesagg t1)
SELECT COUNT(t1.int_col_1) AS int_col_1
FROM with_1 t1
WHERE t1.int_col_1 IS NULL
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
select tinyint_col, id,
row_number() over (order by tinyint_col, id) total_order,
row_number() over (partition by tinyint_col order by id) partitioned_order,
first_value(id) over (partition by tinyint_col order by id),
first_value(cast(id as string)) over (partition by tinyint_col order by id),
last_value(id) over (partition by tinyint_col order by id),
last_value(cast(id as string)) over (partition by tinyint_col order by id),
last_value(id) over (partition by tinyint_col order by id
rows between unbounded preceding and unbounded following)
from alltypes where id < 25 order by tinyint_col, id
---- RESULTS
0,0,1,1,0,'0',0,'0',20
0,10,2,2,0,'0',10,'10',20
0,20,3,3,0,'0',20,'20',20
1,1,4,1,1,'1',1,'1',21
1,11,5,2,1,'1',11,'11',21
1,21,6,3,1,'1',21,'21',21
2,2,7,1,2,'2',2,'2',22
2,12,8,2,2,'2',12,'12',22
2,22,9,3,2,'2',22,'22',22
3,3,10,1,3,'3',3,'3',23
3,13,11,2,3,'3',13,'13',23
3,23,12,3,3,'3',23,'23',23
4,4,13,1,4,'4',4,'4',24
4,14,14,2,4,'4',14,'14',24
4,24,15,3,4,'4',24,'24',24
5,5,16,1,5,'5',5,'5',15
5,15,17,2,5,'5',15,'15',15
6,6,18,1,6,'6',6,'6',16
6,16,19,2,6,'6',16,'16',16
7,7,20,1,7,'7',7,'7',17
7,17,21,2,7,'7',17,'17',17
8,8,22,1,8,'8',8,'8',18
8,18,23,2,8,'8',18,'18',18
9,9,24,1,9,'9',9,'9',19
9,19,25,2,9,'9',19,'19',19
---- TYPES
TINYINT, INT, BIGINT, BIGINT, INT, STRING, INT, STRING, INT
====
---- QUERY
select bool_col, id,
lag(id, 2, -12345) over (partition by bool_col order by id),
lead(id, 2, -12345) over (partition by bool_col order by id)
from alltypestiny
---- RESULTS: VERIFY_IS_EQUAL_SORTED
false,1,-12345,5
false,3,-12345,7
false,5,1,-12345
false,7,3,-12345
true,0,-12345,4
true,2,-12345,6
true,4,0,-12345
true,6,2,-12345
---- TYPES
BOOLEAN, INT, INT, INT
====
---- QUERY
# Tests rank() and dense_rank()
select year, month, bool_col, tinyint_col,
rank() over (partition by year, month order by bool_col, tinyint_col),
dense_rank() over (partition by year, month order by bool_col, tinyint_col)
from alltypes
where tinyint_col < 2 and month < 3 and (id % 100 < 10)
order by year, month, bool_col, tinyint_col
---- RESULTS
2009,1,false,1,1,1
2009,1,false,1,1,1
2009,1,false,1,1,1
2009,1,false,1,1,1
2009,1,true,0,5,2
2009,1,true,0,5,2
2009,1,true,0,5,2
2009,1,true,0,5,2
2009,2,false,1,1,1
2009,2,false,1,1,1
2009,2,true,0,3,2
2009,2,true,0,3,2
2010,1,false,1,1,1
2010,1,false,1,1,1
2010,1,false,1,1,1
2010,1,true,0,4,2
2010,1,true,0,4,2
2010,1,true,0,4,2
2010,2,false,1,1,1
2010,2,false,1,1,1
2010,2,false,1,1,1
2010,2,true,0,4,2
2010,2,true,0,4,2
2010,2,true,0,4,2
---- TYPES
INT, INT, BOOLEAN, TINYINT, BIGINT, BIGINT
====
---- QUERY
# analytics with inline views
select bool_col, a, b, a+b from
(select
bool_col,
count(int_col) over() a,
sum(int_col + bigint_col) over(partition by bool_col) b
from
(select * from functional.alltypes where id < 20) v1) v2
order by 1, 2, 3
---- RESULTS: VERIFY_IS_EQUAL_SORTED
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
---- TYPES
BOOLEAN, BIGINT, BIGINT, BIGINT
====
---- QUERY
# analytics and unions
select 1 a, min(id) over (partition by int_col)
from functional.alltypes where id < 5
union distinct
select 2, max(id) over (partition by bool_col)
from functional.alltypessmall where id >= 5 and id <= 10
union all
(select 3, sum(bigint_col) over (partition by int_col order by id)
from functional.alltypestiny where bool_col = false)
order by 1, 2 desc nulls first
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,4
1,3
1,2
1,1
1,0
2,10
2,9
3,40
3,30
3,20
3,10
---- TYPES
TINYINT, BIGINT
====
---- QUERY
# analytics in an uncorrelated subquery
select id, int_col, bool_col from functional.alltypestiny t1
where int_col in
(select min(bigint_col) over(partition by bool_col)
from functional.alltypessmall t2 where t2.id < 10)
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,0,true
2,0,true
4,0,true
6,0,true
---- TYPES
INT, INT, BOOLEAN
====
---- QUERY
# test conjunct evaluation in analytic nodes
select * from
(select bool_col, tinyint_col,
sum(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 1 following) a
from alltypesagg where id <= 10 and day = 1) v
where
v.tinyint_col * 3 > v.a and
cast(v.a as boolean) = v.bool_col and
v.a is not null
---- RESULTS
true,8,20
---- TYPES
BOOLEAN, TINYINT, BIGINT
====
---- QUERY
# Test ROWS windows with start boundaries
select id,
count(id) over (order by id rows between 3 preceding and 3 preceding) c1,
count(id) over (order by id rows between 3 preceding and 2 preceding) c2,
count(id) over (order by id rows between 3 preceding and 1 preceding) c3,
count(id) over (order by id rows between 3 preceding and current row) c4,
count(id) over (order by id rows between 3 preceding and 1 following) c5,
count(id) over (order by id rows between 3 preceding and 2 following) c6,
count(id) over (order by id rows between 3 preceding and 3 following) c7,
count(id) over (order by id rows between 2 preceding and 3 following) c8,
count(id) over (order by id rows between 1 preceding and 3 following) c9,
count(id) over (order by id rows between current row and 3 following) c10,
count(id) over (order by id rows between 1 following and 3 following) c11,
count(id) over (order by id rows between 2 following and 3 following) c12,
count(id) over (order by id rows between 3 following and 3 following) c13
from alltypes where id < 8
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,0,0,0,1,2,3,4,4,4,4,3,2,1
1,0,0,1,2,3,4,5,5,5,4,3,2,1
2,0,1,2,3,4,5,6,6,5,4,3,2,1
3,1,2,3,4,5,6,7,6,5,4,3,2,1
4,1,2,3,4,5,6,7,6,5,4,3,2,1
5,1,2,3,4,5,6,6,5,4,3,2,1,0
6,1,2,3,4,5,5,5,4,3,2,1,0,0
7,1,2,3,4,4,4,4,3,2,1,0,0,0
---- TYPES
INT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT
====
---- QUERY
# Test sum() and avg() removing values
select id,
sum(int_col) over (order by id rows between 1 preceding and 1 following),
sum(double_col) over (order by id rows between 3 preceding and 2 preceding),
avg(int_col) over (order by id rows between 1 preceding and 1 following),
avg(double_col) over (order by id rows between 3 preceding and 2 preceding),
avg(timestamp_col) over (order by id rows between 2 following and 3 following)
from alltypes where id < 8
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,1,NULL,0.5,NULL,2009-01-01 00:02:30.199999809
1,3,NULL,1,NULL,2009-01-01 00:03:30.449999809
2,6,0,2,0,2009-01-01 00:04:30.349999904
3,9,10.1,3,5.05,2009-01-01 00:05:30.124999999
4,12,30.3,4,15.15,2009-01-01 00:06:30.180000066
5,15,50.49999999999999,5,25.25,2009-01-01 00:07:00.210000038
6,18,70.69999999999999,6,35.34999999999999,NULL
7,13,90.89999999999999,6.5,45.45,NULL
---- TYPES
INT, BIGINT, DOUBLE, DOUBLE, DOUBLE, TIMESTAMP
====
---- QUERY
# More testing of start bounds. This exposed a bug in removing
# values from the window after the partition.
select tinyint_col, int_col,
sum(int_col) over (partition by tinyint_col order by int_col nulls last rows between 2 following and 3 following),
sum(int_col) over (partition by tinyint_col order by int_col nulls last rows between 2 following and 10 following),
sum(int_col) over (partition by tinyint_col order by int_col nulls last rows between 6 following and 100 following)
from alltypesagg
where (tinyint_col is NULL or tinyint_col < 3) and id < 60;
---- RESULTS: VERIFY_IS_EQUAL_SORTED
NULL,10,40,280,180
NULL,10,50,260,140
NULL,20,60,240,100
NULL,20,70,210,50
NULL,30,80,180,NULL
NULL,30,90,140,NULL
NULL,40,100,100,NULL
NULL,40,50,50,NULL
NULL,50,NULL,NULL,NULL
NULL,50,NULL,NULL,NULL
NULL,NULL,NULL,NULL,NULL
NULL,NULL,NULL,NULL,NULL
1,1,52,144,NULL
1,11,72,123,NULL
1,21,92,92,NULL
1,31,51,51,NULL
1,41,NULL,NULL,NULL
1,51,NULL,NULL,NULL
2,2,54,148,NULL
2,12,74,126,NULL
2,22,94,94,NULL
2,32,52,52,NULL
2,42,NULL,NULL,NULL
2,52,NULL,NULL,NULL
---- TYPES
TINYINT, INT, BIGINT, BIGINT, BIGINT
====
---- QUERY
# IMPALA-1273: Incorrect results with very large window and small table
select
count(int_col) over (order by bigint_col rows between 95 preceding and 40 following)
from alltypestiny
---- RESULTS
8
8
8
8
8
8
8
8
---- TYPES
BIGINT
====
---- QUERY
select count(distinct t1.c1) from
(select count(id) over (order by id rows between 550 preceding and 550 following) c1
from alltypesagg) t1
---- RESULTS
551
---- TYPES
BIGINT
====
---- QUERY
select count(distinct t1.c1) from
(select count(id) over (order by id rows between 1050 preceding and 2 following) c1
from alltypesagg) t1
---- RESULTS
1051
---- TYPES
BIGINT
====
---- QUERY
select count(distinct t1.c1) from
(select count(id) over (order by id rows between 1050 preceding and 500 preceding) c1
from alltypesagg) t1
---- RESULTS
552
---- TYPES
BIGINT
====
---- QUERY
select count(distinct t1.c1) from
(select count(id) over (order by id rows between 2 following and 1050 following) c1
from alltypesagg) t1
---- RESULTS
1050
---- TYPES
BIGINT
====
---- QUERY
select count(distinct t1.c1) from
(select count(id) over (order by id rows between 500 following and 1050 following) c1
from alltypesagg) t1
---- RESULTS
552
---- TYPES
BIGINT
====
---- QUERY
select (tinyint_col % 4), id,
count(id) over (partition by (tinyint_col % 4) order by id
rows between 3 following and 10 following)
from alltypes where id < 20
order by (tinyint_col % 4), id
---- RESULTS
0,0,3
0,4,2
0,8,1
0,10,0
0,14,0
0,18,0
1,1,3
1,5,2
1,9,1
1,11,0
1,15,0
1,19,0
2,2,1
2,6,0
2,12,0
2,16,0
3,3,1
3,7,0
3,13,0
3,17,0
---- TYPES
TINYINT, INT, BIGINT
====
---- QUERY
# Test IMPALA-1269: ROWS windows with offset to current row might not remove
# correct values after new partitions
select sum(t2.int_col_1) from
(select SUM(t1.int_col) OVER (PARTITION BY t1.smallint_col ORDER BY t1.day, t1.int_col
ROWS 7 PRECEDING) AS int_col_1
FROM alltypesagg t1 ) t2
---- RESULTS
41790600
---- TYPES
BIGINT
====
---- QUERY
# Test FIRST_VALUE with strings and null handling
select cast(floor(id/3) as int), tinyint_col,
first_value(tinyint_col) over (partition by cast(floor(id/3) as int) order by tinyint_col nulls first),
first_value(cast(tinyint_col as string)) over (partition by cast(floor(id/3) as int) order by cast(tinyint_col as string) nulls first),
last_value(tinyint_col) over (partition by cast(floor(id/3) as int) order by tinyint_col nulls first),
last_value(cast(tinyint_col as string)) over (partition by cast(floor(id/3) as int) order by cast(tinyint_col as string) nulls first)
from alltypesagg where id < 50 and (tinyint_col is null or tinyint_col < 4)
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,NULL,NULL,'NULL',NULL,'NULL'
0,NULL,NULL,'NULL',NULL,'NULL'
0,1,NULL,'NULL',1,'1'
0,2,NULL,'NULL',2,'2'
1,3,3,'3',3,'3'
3,NULL,NULL,'NULL',NULL,'NULL'
3,NULL,NULL,'NULL',NULL,'NULL'
3,1,NULL,'NULL',1,'1'
4,2,2,'2',2,'2'
4,3,2,'2',3,'3'
6,NULL,NULL,'NULL',NULL,'NULL'
6,NULL,NULL,'NULL',NULL,'NULL'
7,1,1,'1',1,'1'
7,2,1,'1',2,'2'
7,3,1,'1',3,'3'
10,NULL,NULL,'NULL',NULL,'NULL'
10,NULL,NULL,'NULL',NULL,'NULL'
10,1,NULL,'NULL',1,'1'
10,2,NULL,'NULL',2,'2'
11,3,3,'3',3,'3'
13,NULL,NULL,'NULL',NULL,'NULL'
13,NULL,NULL,'NULL',NULL,'NULL'
13,1,NULL,'NULL',1,'1'
14,2,2,'2',2,'2'
14,3,2,'2',3,'3'
---- TYPES
INT, TINYINT, TINYINT, STRING, TINYINT, STRING
====
---- QUERY
# IMPALA-1302: Incorrect result of FIRST_VALUE query.
select
first_value(id) over (order by id rows between 3 preceding and 3 following),
first_value(id) over (order by id rows between 3 preceding and 2 following),
first_value(id) over (order by id rows between 3 preceding and 1 following),
first_value(id) over (order by id rows between 3 preceding and current row),
first_value(id) over (order by id rows between 3 preceding and 1 preceding),
first_value(id) over (order by id rows between 3 preceding and 2 preceding),
first_value(id) over (order by id rows between 3 preceding and 3 preceding),
first_value(id) over (order by id rows between 2 preceding and 3 following),
first_value(id) over (order by id rows between 1 preceding and 3 following),
first_value(id) over (order by id rows between current row and 3 following),
first_value(id) over (order by id rows between 1 following and 3 following),
first_value(id) over (order by id rows between 2 following and 3 following),
first_value(id) over (order by id rows between 3 following and 3 following),
first_value(id) over (order by id rows between 15 preceding and 15 preceding),
first_value(id) over (order by id rows between 15 preceding and 15 following),
first_value(id) over (order by id rows between 15 preceding and 6 preceding),
first_value(id) over (order by id rows between 6 preceding and 15 following),
first_value(id) over (order by id rows between 15 following and 15 following),
first_value(id) over (order by id rows between unbounded preceding and 8 preceding),
last_value(id) over (order by id rows between 3 preceding and 3 following),
last_value(id) over (order by id rows between 3 preceding and 2 following),
last_value(id) over (order by id rows between 3 preceding and 1 following),
last_value(id) over (order by id rows between 3 preceding and current row),
last_value(id) over (order by id rows between 3 preceding and 1 preceding),
last_value(id) over (order by id rows between 3 preceding and 2 preceding),
last_value(id) over (order by id rows between 3 preceding and 3 preceding),
last_value(id) over (order by id rows between 2 preceding and 3 following),
last_value(id) over (order by id rows between 1 preceding and 3 following),
last_value(id) over (order by id rows between current row and 3 following),
last_value(id) over (order by id rows between 1 following and 3 following),
last_value(id) over (order by id rows between 2 following and 3 following),
last_value(id) over (order by id rows between 3 following and 3 following),
last_value(id) over (order by id rows between 15 preceding and 15 preceding),
last_value(id) over (order by id rows between 15 preceding and 15 following),
last_value(id) over (order by id rows between 15 preceding and 6 preceding),
last_value(id) over (order by id rows between 6 preceding and 15 following),
last_value(id) over (order by id rows between 15 following and 15 following),
last_value(id) over (order by id rows between 8 following and unbounded following)
from alltypes where id < 10 order by id;
---- RESULTS
0,0,0,0,NULL,NULL,NULL,0,0,0,1,2,3,NULL,0,NULL,0,NULL,NULL,3,2,1,0,NULL,NULL,NULL,3,3,3,3,3,3,NULL,9,NULL,9,NULL,9
0,0,0,0,0,NULL,NULL,0,0,1,2,3,4,NULL,0,NULL,0,NULL,NULL,4,3,2,1,0,NULL,NULL,4,4,4,4,4,4,NULL,9,NULL,9,NULL,9
0,0,0,0,0,0,NULL,0,1,2,3,4,5,NULL,0,NULL,0,NULL,NULL,5,4,3,2,1,0,NULL,5,5,5,5,5,5,NULL,9,NULL,9,NULL,NULL
0,0,0,0,0,0,0,1,2,3,4,5,6,NULL,0,NULL,0,NULL,NULL,6,5,4,3,2,1,0,6,6,6,6,6,6,NULL,9,NULL,9,NULL,NULL
1,1,1,1,1,1,1,2,3,4,5,6,7,NULL,0,NULL,0,NULL,NULL,7,6,5,4,3,2,1,7,7,7,7,7,7,NULL,9,NULL,9,NULL,NULL
2,2,2,2,2,2,2,3,4,5,6,7,8,NULL,0,NULL,0,NULL,NULL,8,7,6,5,4,3,2,8,8,8,8,8,8,NULL,9,NULL,9,NULL,NULL
3,3,3,3,3,3,3,4,5,6,7,8,9,NULL,0,0,0,NULL,NULL,9,8,7,6,5,4,3,9,9,9,9,9,9,NULL,9,0,9,NULL,NULL
4,4,4,4,4,4,4,5,6,7,8,9,NULL,NULL,0,0,1,NULL,NULL,9,9,8,7,6,5,4,9,9,9,9,9,NULL,NULL,9,1,9,NULL,NULL
5,5,5,5,5,5,5,6,7,8,9,NULL,NULL,NULL,0,0,2,NULL,0,9,9,9,8,7,6,5,9,9,9,9,NULL,NULL,NULL,9,2,9,NULL,NULL
6,6,6,6,6,6,6,7,8,9,NULL,NULL,NULL,NULL,0,0,3,NULL,0,9,9,9,9,8,7,6,9,9,9,NULL,NULL,NULL,NULL,9,3,9,NULL,NULL
---- TYPES
INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT
====
---- QUERY
# Test RANGE with UNBOUNDED FOLLOWING
select id,
sum(id) over (order by id range between unbounded preceding and unbounded following)
from alltypesagg where id <= 30 order by id;
---- RESULTS
0,525
0,525
1,525
2,525
3,525
4,525
5,525
6,525
7,525
8,525
9,525
10,525
10,525
11,525
12,525
13,525
14,525
15,525
16,525
17,525
18,525
19,525
20,525
20,525
21,525
22,525
23,525
24,525
25,525
26,525
27,525
28,525
29,525
30,525
30,525
---- TYPES
INT, BIGINT
====
---- QUERY
# IMPALA-1293: DCHECK failure with window ROWS BETWEEN UNBOUNDED PRECEDING
# AND <large> PRECEDING
select count(*), count(t1.c1) from
(select SUM(tinyint_col) OVER (PARTITION BY bigint_col ORDER BY tinyint_col ASC,
id ASC, timestamp_col ASC, day ASC
ROWS BETWEEN UNBOUNDED PRECEDING AND 11 PRECEDING) c1
FROM alltypesagg) t1
---- RESULTS
11000,0
---- TYPES
BIGINT, BIGINT
====
---- QUERY
# Test RANGE with UNBOUNDED FOLLOWING on many partitions
select count(distinct t1.c1) from
(select sum(id) over (partition by day order by tinyint_col
range between unbounded preceding and unbounded following) c1
from alltypesagg) t1;
---- RESULTS
11
---- TYPES
BIGINT
====
---- QUERY
# IMPALA-1296: Fix DCHECK when unnecessary buffered tuple (for sum() analytic) not null
select
sum(month) over (),
dense_rank() over (order by date_string_col)
from alltypes where id < 5
---- RESULTS
5,1
5,1
5,1
5,1
5,1
---- TYPES
BIGINT, BIGINT
====
---- QUERY
# IMPALA-1292: Incorrect result in analytic SUM when ORDER BY column is null
select tinyint_col, id,
SUM(id) OVER (ORDER BY tinyint_col ASC, id ASC)
FROM alltypesagg
where (tinyint_col is NULL or tinyint_col < 2) and id < 100 order by 1, 2
---- RESULTS
1,1,1
1,11,12
1,21,33
1,31,64
1,41,105
1,51,156
1,61,217
1,71,288
1,81,369
1,91,460
NULL,0,460
NULL,0,460
NULL,10,480
NULL,10,480
NULL,20,520
NULL,20,520
NULL,30,580
NULL,30,580
NULL,40,660
NULL,40,660
NULL,50,760
NULL,50,760
NULL,60,880
NULL,60,880
NULL,70,1020
NULL,70,1020
NULL,80,1180
NULL,80,1180
NULL,90,1360
NULL,90,1360
---- TYPES
TINYINT, INT, BIGINT
====
---- QUERY
# IMPALA-1280: Crash running analytic with LEFT SEMI JOIN
select sum(t1.int_col) over (partition by t1.id order by t1.int_col, t1.month)
from alltypestiny t1
where exists (select tt1.month from alltypes tt1 where t1.int_col = tt1.smallint_col)
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0
0
0
1
0
1
1
1
---- TYPES
BIGINT
====
---- QUERY
# Regression test for IMPALA-1223. Query with analytic query with right join.
# Need proper handling of NULLs.
SELECT SUM(t1.int_col) OVER () AS int_col_1 FROM alltypestiny t1
RIGHT JOIN alltypestiny t2 ON t2.tinyint_col = t1.bigint_col AND t2.id = t1.month;
---- RESULTS
0
0
0
0
0
0
0
0
---- TYPES
BIGINT
====
---- QUERY
## IMPALA-1312: Fix DCHECK failure in BufferedTupleStream
## Disabled due to slow execution without codegen, breaks exhaustive runs (IMPALA-1414)
#SELECT
# SUM(t1.int_col * t1.id) OVER (
# ORDER BY t1.int_col * t1.id ASC, t3.year + t3.tinyint_col ASC ROWS
# BETWEEN 66 PRECEDING AND 21 FOLLOWING) AS int_col_1
#FROM alltypes t1
#INNER JOIN alltypes t2 ON t2.month = t1.smallint_col
#INNER JOIN alltypes t3 ON t3.bigint_col = t2.bigint_col
#WHERE
#t2.id <= t3.smallint_col + t2.tinyint_col
#order by 1
#limit 5
#---- TYPES
#BIGINT
#---- RESULTS
#22
#23
#24
#25
#26
#====
#----QUERY
# Testing crashes in buffered tuple stream related to IMPALA-1306.
SELECT AVG(t1.int_col) OVER () AS double_col_1
FROM alltypestiny t1
WHERE EXISTS
(SELECT t1.month AS int_col_1
FROM alltypestiny t1)
---- RESULTS
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5
---- TYPES
DOUBLE
====
---- QUERY
# IMPALA-1808: handle partition/order by exprs with NaN
select id / int_col,
count(*) over (partition by id / int_col order by id),
count(*) over (order by id / int_col),
count(*) over (order by id / int_col rows between 100 preceding and 90 preceding),
count(*) over (order by id / int_col rows between 2 preceding and 1 preceding),
count(*) over (order by id / int_col rows between 1 preceding and 1 following),
count(*) over (order by id / int_col rows between 1 following and 2 following),
count(*) over (order by id / int_col rows between 90 following and 100 following)
from alltypestiny
---- RESULTS
NaN,1,1,0,0,2,2,0
1,1,2,0,1,3,2,0
3,1,3,0,2,3,2,0
5,1,4,0,2,3,2,0
7,1,5,0,2,3,2,0
Infinity,1,8,0,2,3,2,0
Infinity,2,8,0,2,3,1,0
Infinity,3,8,0,2,2,0,0
---- TYPES
DOUBLE, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT
====
----QUERY
# IMPALA-1483: Check that TupleIsNullPredicates referring to the logical
# analytic output are substituted with TupleIsNullPredicates referring to
# the physical output.
select t2.a, t2.id, t2.int_col
from functional.alltypes t1
left outer join
(select sum(int_col) over(order by id) a, id, int_col is null as int_col
from functional.alltypes
where year = 2009 and month between 1 and 2) t2
on (t1.id = t2.id)
where year = 2009 and month = 1
order by t2.id limit 10
---- RESULTS
0,0,false
1,1,false
3,2,false
6,3,false
10,4,false
15,5,false
21,6,false
28,7,false
36,8,false
45,9,false
---- TYPES
BIGINT, INT, BOOLEAN
====
---- QUERY
# IMPALA-1542: Analytic fn containing arithmetic expr on NULL crashed
select lag(coalesce(505, 1 + NULL), 1) over (order by int_col desc)
from functional.alltypestiny
---- RESULTS
NULL
505
505
505
505
505
505
505
---- TYPES
SMALLINT
====
---- QUERY
# IMPALA-1562: AnalyticEvalNode not properly handling nullable tuple
select max(t3.c1) from
(select min(t2.string_col) over () c1
from alltypesagg t1
left join alltypes t2 on t2.string_col = t1.string_col ) t3
---- RESULTS
'0'
---- TYPES
STRING
====
---- QUERY
# IMPALA-1559: FIRST_VALUE rewrite function intermediate type not matching slot type
select
first_value(-32.9) over (order by id rows between 92 preceding and unbounded following),
first_value(1.1) over (order by id rows between 92 preceding and 1 preceding)
from alltypestiny
---- RESULTS
-32.9,NULL
-32.9,1.1
-32.9,1.1
-32.9,1.1
-32.9,1.1
-32.9,1.1
-32.9,1.1
-32.9,1.1
---- TYPES
DECIMAL, DECIMAL
====
---- QUERY
# IMPALA-1888: FIRST_VALUE produces incorrect results when the window
# completely precedes all rows in a partition.
select id, date_string_col,
first_value(id) over (partition by date_string_col order by id
rows between 10 preceding and 3 preceding)
from alltypes where id < 15 order by id
---- RESULTS
0,'01/01/09',NULL
1,'01/01/09',NULL
2,'01/01/09',NULL
3,'01/01/09',0
4,'01/01/09',0
5,'01/01/09',0
6,'01/01/09',0
7,'01/01/09',0
8,'01/01/09',0
9,'01/01/09',0
10,'01/02/09',NULL
11,'01/02/09',NULL
12,'01/02/09',NULL
13,'01/02/09',10
14,'01/02/09',10
---- TYPES
INT, STRING, INT
====