Files
impala/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
Matthew Jacobs 8b1b8f5780 IMPALA-1302: Incorrect result of FIRST_VALUE query
FIRST_VALUE with row offsets preceding did not produce the correct
results. This fix changes the rewrite for FIRST_VALUE and adds
additional handling for NULLs in the backend.

Change-Id: I03d54c05f63f46e9adb467008fa876ab33812c7b
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/4648
Reviewed-by: Matthew Jacobs <mj@cloudera.com>
Tested-by: jenkins
2014-10-06 15:12:03 -07:00

1027 lines
28 KiB
Plaintext

====
---- QUERY
select year, month,
count(int_col) over (partition by year, month),
avg(int_col) over (partition by year, month),
avg(timestamp_col) over (partition by year, month),
min(string_col) over (partition by year, month),
max(string_col) over (partition by year, month)
from alltypessmall
where id % 4 = 0 and month != 1;
---- RESULTS: VERIFY_IS_EQUAL_SORTED
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
---- TYPES
INT, INT, BIGINT, DOUBLE, TIMESTAMP, STRING, STRING
====
---- QUERY
select int_col,
count(int_col) over ()
from alltypessmall
where id % 4 = 0 and month = 1
order by int_col
---- RESULTS
0,7
0,7
2,7
4,7
4,7
6,7
8,7
---- TYPES
INT, BIGINT
====
---- QUERY
select tinyint_col,
count(int_col) over (order by tinyint_col)
from alltypessmall
where month = 1 and tinyint_col < 5
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,3
0,3
0,3
1,6
1,6
1,6
2,9
2,9
2,9
3,12
3,12
3,12
4,15
4,15
4,15
---- TYPES
TINYINT, BIGINT
====
---- QUERY
# Test ordering expressions having NULL values
select tinyint_col, smallint_col, int_col,
sum(smallint_col) over (order by tinyint_col nulls first)
from alltypesagg
where year = 2010 and month = 1 and day = 4 and id <= 3030 and bool_col = true
order by tinyint_col, id, smallint_col, int_col
---- RESULTS
2,2,2,96
2,12,12,96
2,22,22,96
4,4,4,138
4,14,14,138
4,24,24,138
6,6,6,186
6,16,16,186
6,26,26,186
8,8,8,240
8,18,18,240
8,28,28,240
NULL,NULL,NULL,60
NULL,10,10,60
NULL,20,20,60
NULL,30,30,60
---- TYPES
TINYINT, SMALLINT, INT, BIGINT
====
---- QUERY
# Test partitions and ordering expressions having NULL values for both
# the default window and ROWS windows.
select tinyint_col, smallint_col,
sum(smallint_col) over (partition by tinyint_col order by smallint_col),
sum(smallint_col) over (partition by tinyint_col order by smallint_col
rows between unbounded preceding and 1 following),
sum(smallint_col) over (partition by tinyint_col order by smallint_col
rows between 1 following and 2 following)
from alltypesagg
where year = 2010 and month = 1 and day = 4 and id <= 3200
and (smallint_col < 50 or smallint_col is NULL)
and (tinyint_col = 1 or tinyint_col is NULL)
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,1,2,2,12
1,1,2,13,22
1,11,24,24,32
1,11,24,45,42
1,21,66,66,52
1,21,66,97,62
1,31,128,128,72
1,31,128,169,82
1,41,210,210,41
1,41,210,210,NULL
NULL,10,20,20,30
NULL,10,20,40,40
NULL,20,60,60,50
NULL,20,60,90,60
NULL,30,120,120,70
NULL,30,120,160,80
NULL,40,200,200,40
NULL,40,200,200,NULL
NULL,NULL,200,200,NULL
NULL,NULL,200,200,NULL
NULL,NULL,200,200,NULL
---- TYPES
TINYINT, SMALLINT, BIGINT, BIGINT, BIGINT
====
---- QUERY
# Test sum() NULL handling with different sliding windows (sum() has special NULL
# handling in Remove()).
select
sum(tinyint_col) over (order by id nulls last rows between 1 preceding and 1 preceding),
sum(tinyint_col) over (order by id nulls last rows between 1 preceding and current row),
sum(tinyint_col) over (order by id nulls last rows between current row and 1 following),
sum(tinyint_col) over (order by id nulls last rows between 1 following and 1 following)
from alltypesagg where id < 30 and (tinyint_col is null or tinyint_col < 6)
---- RESULTS: VERIFY_IS_EQUAL_SORTED
NULL,NULL,NULL,NULL
NULL,NULL,1,1
NULL,1,3,2
1,3,5,3
2,5,7,4
3,7,9,5
4,9,5,NULL
5,5,NULL,NULL
NULL,NULL,1,1
NULL,1,3,2
1,3,5,3
2,5,7,4
3,7,9,5
4,9,5,NULL
5,5,NULL,NULL
NULL,NULL,1,1
NULL,1,3,2
1,3,5,3
2,5,7,4
3,7,9,5
4,9,5,NULL
---- TYPES
BIGINT, BIGINT, BIGINT, BIGINT
====
---- QUERY
# Test multiple partition exprs and multiple order by exprs
select year, month, bool_col, tinyint_col,
count(id) over (partition by year, month order by bool_col, tinyint_col)
from alltypes
where tinyint_col < 2 and month < 3 and (id % 100 < 10)
order by year, month, bool_col, tinyint_col
---- RESULTS
2009,1,false,1,4
2009,1,false,1,4
2009,1,false,1,4
2009,1,false,1,4
2009,1,true,0,8
2009,1,true,0,8
2009,1,true,0,8
2009,1,true,0,8
2009,2,false,1,2
2009,2,false,1,2
2009,2,true,0,4
2009,2,true,0,4
2010,1,false,1,3
2010,1,false,1,3
2010,1,false,1,3
2010,1,true,0,6
2010,1,true,0,6
2010,1,true,0,6
2010,2,false,1,3
2010,2,false,1,3
2010,2,false,1,3
2010,2,true,0,6
2010,2,true,0,6
2010,2,true,0,6
---- TYPES
INT, INT, BOOLEAN, TINYINT, BIGINT
====
---- QUERY
# Same as above but with no ordering and a limit. Only count the number of results
# because they are non-deterministic.
select count(*) from
(select year, month, bool_col, tinyint_col,
count(id) over (partition by year, month order by bool_col, tinyint_col)
from alltypes
where tinyint_col < 2 and month < 3 and (id % 100 < 10)
limit 7) v
---- RESULTS
7
---- TYPES
BIGINT
====
---- QUERY
# Simple ROWS window
select (month % 2), int_col,
sum(int_col) over (partition by (month % 2)
order by int_col rows between unbounded preceding and current row)
from alltypestiny
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,0,0
0,0,0
0,1,1
0,1,2
1,0,0
1,0,0
1,1,1
1,1,2
---- TYPES
INT, INT, BIGINT
====
---- QUERY
# ROWS windows with an end boundary offset, some before or after all results.
select
count(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 1 preceding),
sum(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 1 following),
sum(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 3 preceding),
sum(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 20 following),
sum(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 20 preceding)
from alltypesagg where id <= 10 and day = 1
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,4,NULL,25,NULL
1,9,NULL,25,NULL
2,16,NULL,25,NULL
3,25,1,25,NULL
4,25,4,25,NULL
0,6,NULL,20,NULL
1,12,NULL,20,NULL
2,20,NULL,20,NULL
3,20,2,20,NULL
4,20,6,20,NULL
4,20,12,20,NULL
---- TYPES
BIGINT, BIGINT, BIGINT, BIGINT, BIGINT
====
---- QUERY
# multiple analytics in the same select block
select tinyint_col, int_col, bigint_col,
count(bigint_col)
over(partition by tinyint_col, bigint_col order by int_col desc
rows between unbounded preceding and 1 following),
max(tinyint_col)
over(partition by bigint_col, tinyint_col order by int_col desc
rows between unbounded preceding and 1 following),
# different window
min(int_col)
over(partition by bigint_col, tinyint_col order by int_col desc
rows between unbounded preceding and 2 following),
# different sort order but same partition
max(int_col)
over(partition by bigint_col, tinyint_col order by int_col asc
rows between unbounded preceding and 2 following),
# different partition
sum(int_col)
over(partition by tinyint_col order by int_col desc
rows between unbounded preceding and 2 following)
from functional.alltypesagg
where id < 20
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,1,10,1,1,1,1,12
1,11,110,1,1,11,11,12
2,2,20,1,2,2,2,14
2,12,120,1,2,12,12,14
3,3,30,1,3,3,3,16
3,13,130,1,3,13,13,16
4,4,40,1,4,4,4,18
4,14,140,1,4,14,14,18
5,5,50,1,5,5,5,20
5,15,150,1,5,15,15,20
6,6,60,1,6,6,6,22
6,16,160,1,6,16,16,22
7,7,70,1,7,7,7,24
7,17,170,1,7,17,17,24
8,8,80,1,8,8,8,26
8,18,180,1,8,18,18,26
9,9,90,1,9,9,9,28
9,19,190,1,9,19,19,28
NULL,10,100,2,NULL,10,10,20
NULL,10,100,2,NULL,10,10,20
NULL,NULL,NULL,0,NULL,NULL,NULL,10
NULL,NULL,NULL,0,NULL,NULL,NULL,20
---- TYPES
TINYINT, INT, BIGINT, BIGINT, TINYINT, INT, INT, BIGINT
====
---- QUERY
# analytics on a grouped aggregation with a final order by
select bool_col,
sum(min(int_col))
over(partition by min(tinyint_col) order by max(int_col)
rows between unbounded preceding and 1 following),
max(sum(bigint_col))
over(partition by min(tinyint_col) order by max(int_col)
rows between unbounded preceding and 1 following),
min(sum(bigint_col))
over(partition by min(tinyint_col) order by sum(int_col)
rows between unbounded preceding and 4 following)
from functional.alltypes
group by 1
order by 1, 2, 3
---- RESULTS
false,1,182500,182500
true,0,146000,146000
---- TYPES
BOOLEAN, BIGINT, BIGINT, BIGINT
====
---- QUERY
# Test for IMPALA-1200
select DISTINCT t1.c1 FROM
(SELECT MAX(smallint_col) OVER (PARTITION BY year ORDER BY id DESC) AS c1
FROM alltypesagg) t1
---- RESULTS
99
---- TYPES
SMALLINT
====
---- QUERY
# Test for IMPALA-1233
with with_1 AS (
SELECT RANK() OVER (ORDER BY t1.id) AS int_col_1
FROM alltypesagg t1)
SELECT COUNT(t1.int_col_1) AS int_col_1
FROM with_1 t1
WHERE t1.int_col_1 IS NULL
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
select tinyint_col, id,
row_number() over (order by tinyint_col, id) total_order,
row_number() over (partition by tinyint_col order by id) partitioned_order,
first_value(id) over (partition by tinyint_col order by id),
first_value(cast(id as string)) over (partition by tinyint_col order by id),
last_value(id) over (partition by tinyint_col order by id),
last_value(cast(id as string)) over (partition by tinyint_col order by id),
last_value(id) over (partition by tinyint_col order by id
rows between unbounded preceding and unbounded following)
from alltypes where id < 25 order by tinyint_col, id
---- RESULTS
0,0,1,1,0,'0',0,'0',20
0,10,2,2,0,'0',10,'10',20
0,20,3,3,0,'0',20,'20',20
1,1,4,1,1,'1',1,'1',21
1,11,5,2,1,'1',11,'11',21
1,21,6,3,1,'1',21,'21',21
2,2,7,1,2,'2',2,'2',22
2,12,8,2,2,'2',12,'12',22
2,22,9,3,2,'2',22,'22',22
3,3,10,1,3,'3',3,'3',23
3,13,11,2,3,'3',13,'13',23
3,23,12,3,3,'3',23,'23',23
4,4,13,1,4,'4',4,'4',24
4,14,14,2,4,'4',14,'14',24
4,24,15,3,4,'4',24,'24',24
5,5,16,1,5,'5',5,'5',15
5,15,17,2,5,'5',15,'15',15
6,6,18,1,6,'6',6,'6',16
6,16,19,2,6,'6',16,'16',16
7,7,20,1,7,'7',7,'7',17
7,17,21,2,7,'7',17,'17',17
8,8,22,1,8,'8',8,'8',18
8,18,23,2,8,'8',18,'18',18
9,9,24,1,9,'9',9,'9',19
9,19,25,2,9,'9',19,'19',19
---- TYPES
TINYINT, INT, BIGINT, BIGINT, INT, STRING, INT, STRING, INT
====
---- QUERY
select bool_col, id,
lag(id, 2, -12345) over (partition by bool_col order by id),
lead(id, 2, -12345) over (partition by bool_col order by id)
from alltypestiny
---- RESULTS: VERIFY_IS_EQUAL_SORTED
false,1,-12345,5
false,3,-12345,7
false,5,1,-12345
false,7,3,-12345
true,0,-12345,4
true,2,-12345,6
true,4,0,-12345
true,6,2,-12345
---- TYPES
BOOLEAN, INT, INT, INT
====
---- QUERY
# Tests rank() and dense_rank()
select year, month, bool_col, tinyint_col,
rank() over (partition by year, month order by bool_col, tinyint_col),
dense_rank() over (partition by year, month order by bool_col, tinyint_col)
from alltypes
where tinyint_col < 2 and month < 3 and (id % 100 < 10)
order by year, month, bool_col, tinyint_col
---- RESULTS
2009,1,false,1,1,1
2009,1,false,1,1,1
2009,1,false,1,1,1
2009,1,false,1,1,1
2009,1,true,0,5,2
2009,1,true,0,5,2
2009,1,true,0,5,2
2009,1,true,0,5,2
2009,2,false,1,1,1
2009,2,false,1,1,1
2009,2,true,0,3,2
2009,2,true,0,3,2
2010,1,false,1,1,1
2010,1,false,1,1,1
2010,1,false,1,1,1
2010,1,true,0,4,2
2010,1,true,0,4,2
2010,1,true,0,4,2
2010,2,false,1,1,1
2010,2,false,1,1,1
2010,2,false,1,1,1
2010,2,true,0,4,2
2010,2,true,0,4,2
2010,2,true,0,4,2
---- TYPES
INT, INT, BOOLEAN, TINYINT, BIGINT, BIGINT
====
---- QUERY
# analytics with inline views
select bool_col, a, b, a+b from
(select
bool_col,
count(int_col) over() a,
sum(int_col + bigint_col) over(partition by bool_col) b
from
(select * from functional.alltypes where id < 20) v1) v2
order by 1, 2, 3
---- RESULTS: VERIFY_IS_EQUAL_SORTED
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
---- TYPES
BOOLEAN, BIGINT, BIGINT, BIGINT
====
---- QUERY
# analytics and unions
select 1 a, min(id) over (partition by int_col)
from functional.alltypes where id < 5
union distinct
select 2, max(id) over (partition by bool_col)
from functional.alltypessmall where id >= 5 and id <= 10
union all
(select 3, sum(bigint_col) over (partition by int_col order by id)
from functional.alltypestiny where bool_col = false)
order by 1, 2 desc nulls first
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,4
1,3
1,2
1,1
1,0
2,10
2,9
3,40
3,30
3,20
3,10
---- TYPES
TINYINT, BIGINT
====
---- QUERY
# analytics in an uncorrelated subquery
select id, int_col, bool_col from functional.alltypestiny t1
where int_col in
(select min(bigint_col) over(partition by bool_col)
from functional.alltypessmall t2 where t2.id < 10)
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,0,true
2,0,true
4,0,true
6,0,true
---- TYPES
INT, INT, BOOLEAN
====
---- QUERY
# test conjunct evaluation in analytic nodes
select * from
(select bool_col, tinyint_col,
sum(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 1 following) a
from alltypesagg where id <= 10 and day = 1) v
where
v.tinyint_col * 3 > v.a and
cast(v.a as boolean) = v.bool_col and
v.a is not null
---- RESULTS
true,8,20
---- TYPES
BOOLEAN, TINYINT, BIGINT
====
---- QUERY
# Test ROWS windows with start boundaries
select id,
count(id) over (order by id rows between 3 preceding and 3 preceding) c1,
count(id) over (order by id rows between 3 preceding and 2 preceding) c2,
count(id) over (order by id rows between 3 preceding and 1 preceding) c3,
count(id) over (order by id rows between 3 preceding and current row) c4,
count(id) over (order by id rows between 3 preceding and 1 following) c5,
count(id) over (order by id rows between 3 preceding and 2 following) c6,
count(id) over (order by id rows between 3 preceding and 3 following) c7,
count(id) over (order by id rows between 2 preceding and 3 following) c8,
count(id) over (order by id rows between 1 preceding and 3 following) c9,
count(id) over (order by id rows between current row and 3 following) c10,
count(id) over (order by id rows between 1 following and 3 following) c11,
count(id) over (order by id rows between 2 following and 3 following) c12,
count(id) over (order by id rows between 3 following and 3 following) c13
from alltypes where id < 8
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,0,0,0,1,2,3,4,4,4,4,3,2,1
1,0,0,1,2,3,4,5,5,5,4,3,2,1
2,0,1,2,3,4,5,6,6,5,4,3,2,1
3,1,2,3,4,5,6,7,6,5,4,3,2,1
4,1,2,3,4,5,6,7,6,5,4,3,2,1
5,1,2,3,4,5,6,6,5,4,3,2,1,0
6,1,2,3,4,5,5,5,4,3,2,1,0,0
7,1,2,3,4,4,4,4,3,2,1,0,0,0
---- TYPES
INT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT
====
---- QUERY
# Test sum() and avg() removing values
select id,
sum(int_col) over (order by id rows between 1 preceding and 1 following),
sum(double_col) over (order by id rows between 3 preceding and 2 preceding),
avg(int_col) over (order by id rows between 1 preceding and 1 following),
avg(double_col) over (order by id rows between 3 preceding and 2 preceding),
avg(timestamp_col) over (order by id rows between 2 following and 3 following)
from alltypes where id < 8
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,1,NULL,0.5,NULL,2009-01-01 00:02:30.199999809
1,3,NULL,1,NULL,2009-01-01 00:03:30.449999809
2,6,0,2,0,2009-01-01 00:04:30.349999904
3,9,10.1,3,5.05,2009-01-01 00:05:30.124999999
4,12,30.3,4,15.15,2009-01-01 00:06:30.180000066
5,15,50.49999999999999,5,25.25,2009-01-01 00:07:00.210000038
6,18,70.69999999999999,6,35.34999999999999,NULL
7,13,90.89999999999999,6.5,45.45,NULL
---- TYPES
INT, BIGINT, DOUBLE, DOUBLE, DOUBLE, TIMESTAMP
====
---- QUERY
# More testing of start bounds. This exposed a bug in removing
# values from the window after the partition.
select tinyint_col, int_col,
sum(int_col) over (partition by tinyint_col order by int_col nulls last rows between 2 following and 3 following),
sum(int_col) over (partition by tinyint_col order by int_col nulls last rows between 2 following and 10 following),
sum(int_col) over (partition by tinyint_col order by int_col nulls last rows between 6 following and 100 following)
from alltypesagg
where (tinyint_col is NULL or tinyint_col < 3) and id < 60;
---- RESULTS: VERIFY_IS_EQUAL_SORTED
NULL,10,40,280,180
NULL,10,50,260,140
NULL,20,60,240,100
NULL,20,70,210,50
NULL,30,80,180,NULL
NULL,30,90,140,NULL
NULL,40,100,100,NULL
NULL,40,50,50,NULL
NULL,50,NULL,NULL,NULL
NULL,50,NULL,NULL,NULL
NULL,NULL,NULL,NULL,NULL
NULL,NULL,NULL,NULL,NULL
1,1,52,144,NULL
1,11,72,123,NULL
1,21,92,92,NULL
1,31,51,51,NULL
1,41,NULL,NULL,NULL
1,51,NULL,NULL,NULL
2,2,54,148,NULL
2,12,74,126,NULL
2,22,94,94,NULL
2,32,52,52,NULL
2,42,NULL,NULL,NULL
2,52,NULL,NULL,NULL
---- TYPES
TINYINT, INT, BIGINT, BIGINT, BIGINT
====
---- QUERY
# IMPALA-1273: Incorrect results with very large window and small table
select
count(int_col) over (order by bigint_col rows between 95 preceding and 40 following)
from alltypestiny
---- RESULTS
8
8
8
8
8
8
8
8
---- TYPES
BIGINT
====
---- QUERY
select count(distinct t1.c1) from
(select count(id) over (order by id rows between 550 preceding and 550 following) c1
from alltypesagg) t1
---- RESULTS
551
---- TYPES
BIGINT
====
---- QUERY
select count(distinct t1.c1) from
(select count(id) over (order by id rows between 1050 preceding and 2 following) c1
from alltypesagg) t1
---- RESULTS
1051
---- TYPES
BIGINT
====
---- QUERY
select count(distinct t1.c1) from
(select count(id) over (order by id rows between 1050 preceding and 500 preceding) c1
from alltypesagg) t1
---- RESULTS
552
---- TYPES
BIGINT
====
---- QUERY
select count(distinct t1.c1) from
(select count(id) over (order by id rows between 2 following and 1050 following) c1
from alltypesagg) t1
---- RESULTS
1050
---- TYPES
BIGINT
====
---- QUERY
select count(distinct t1.c1) from
(select count(id) over (order by id rows between 500 following and 1050 following) c1
from alltypesagg) t1
---- RESULTS
552
---- TYPES
BIGINT
====
---- QUERY
select (tinyint_col % 4), id,
count(id) over (partition by (tinyint_col % 4) order by id
rows between 3 following and 10 following)
from alltypes where id < 20
order by (tinyint_col % 4), id
---- RESULTS
0,0,3
0,4,2
0,8,1
0,10,0
0,14,0
0,18,0
1,1,3
1,5,2
1,9,1
1,11,0
1,15,0
1,19,0
2,2,1
2,6,0
2,12,0
2,16,0
3,3,1
3,7,0
3,13,0
3,17,0
---- TYPES
TINYINT, INT, BIGINT
====
---- QUERY
# Test IMPALA-1269: ROWS windows with offset to current row might not remove
# correct values after new partitions
select sum(t2.int_col_1) from
(select SUM(t1.int_col) OVER (PARTITION BY t1.smallint_col ORDER BY t1.day, t1.int_col
ROWS 7 PRECEDING) AS int_col_1
FROM alltypesagg t1 ) t2
---- RESULTS
41790600
---- TYPES
BIGINT
====
---- QUERY
# Test FIRST_VALUE with strings and null handling
select cast(floor(id/3) as int), tinyint_col,
first_value(tinyint_col) over (partition by cast(floor(id/3) as int) order by tinyint_col nulls first),
first_value(cast(tinyint_col as string)) over (partition by cast(floor(id/3) as int) order by cast(tinyint_col as string) nulls first),
last_value(tinyint_col) over (partition by cast(floor(id/3) as int) order by tinyint_col nulls first),
last_value(cast(tinyint_col as string)) over (partition by cast(floor(id/3) as int) order by cast(tinyint_col as string) nulls first)
from alltypesagg where id < 50 and (tinyint_col is null or tinyint_col < 4)
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,NULL,NULL,'NULL',NULL,'NULL'
0,NULL,NULL,'NULL',NULL,'NULL'
0,1,NULL,'NULL',1,'1'
0,2,NULL,'NULL',2,'2'
1,3,3,'3',3,'3'
3,NULL,NULL,'NULL',NULL,'NULL'
3,NULL,NULL,'NULL',NULL,'NULL'
3,1,NULL,'NULL',1,'1'
4,2,2,'2',2,'2'
4,3,2,'2',3,'3'
6,NULL,NULL,'NULL',NULL,'NULL'
6,NULL,NULL,'NULL',NULL,'NULL'
7,1,1,'1',1,'1'
7,2,1,'1',2,'2'
7,3,1,'1',3,'3'
10,NULL,NULL,'NULL',NULL,'NULL'
10,NULL,NULL,'NULL',NULL,'NULL'
10,1,NULL,'NULL',1,'1'
10,2,NULL,'NULL',2,'2'
11,3,3,'3',3,'3'
13,NULL,NULL,'NULL',NULL,'NULL'
13,NULL,NULL,'NULL',NULL,'NULL'
13,1,NULL,'NULL',1,'1'
14,2,2,'2',2,'2'
14,3,2,'2',3,'3'
---- TYPES
INT, TINYINT, TINYINT, STRING, TINYINT, STRING
====
---- QUERY
# IMPALA-1302: Incorrect result of FIRST_VALUE query.
select
first_value(id) over (order by id rows between 3 preceding and 3 following),
first_value(id) over (order by id rows between 3 preceding and 2 following),
first_value(id) over (order by id rows between 3 preceding and 1 following),
first_value(id) over (order by id rows between 3 preceding and current row),
first_value(id) over (order by id rows between 3 preceding and 1 preceding),
first_value(id) over (order by id rows between 3 preceding and 2 preceding),
first_value(id) over (order by id rows between 3 preceding and 3 preceding),
first_value(id) over (order by id rows between 2 preceding and 3 following),
first_value(id) over (order by id rows between 1 preceding and 3 following),
first_value(id) over (order by id rows between current row and 3 following),
first_value(id) over (order by id rows between 1 following and 3 following),
first_value(id) over (order by id rows between 2 following and 3 following),
first_value(id) over (order by id rows between 3 following and 3 following),
first_value(id) over (order by id rows between 15 preceding and 15 preceding),
first_value(id) over (order by id rows between 15 preceding and 15 following),
first_value(id) over (order by id rows between 15 preceding and 6 preceding),
first_value(id) over (order by id rows between 6 preceding and 15 following),
first_value(id) over (order by id rows between 15 following and 15 following),
first_value(id) over (order by id rows between unbounded preceding and 8 preceding),
last_value(id) over (order by id rows between 3 preceding and 3 following),
last_value(id) over (order by id rows between 3 preceding and 2 following),
last_value(id) over (order by id rows between 3 preceding and 1 following),
last_value(id) over (order by id rows between 3 preceding and current row),
last_value(id) over (order by id rows between 3 preceding and 1 preceding),
last_value(id) over (order by id rows between 3 preceding and 2 preceding),
last_value(id) over (order by id rows between 3 preceding and 3 preceding),
last_value(id) over (order by id rows between 2 preceding and 3 following),
last_value(id) over (order by id rows between 1 preceding and 3 following),
last_value(id) over (order by id rows between current row and 3 following),
last_value(id) over (order by id rows between 1 following and 3 following),
last_value(id) over (order by id rows between 2 following and 3 following),
last_value(id) over (order by id rows between 3 following and 3 following),
last_value(id) over (order by id rows between 15 preceding and 15 preceding),
last_value(id) over (order by id rows between 15 preceding and 15 following),
last_value(id) over (order by id rows between 15 preceding and 6 preceding),
last_value(id) over (order by id rows between 6 preceding and 15 following),
last_value(id) over (order by id rows between 15 following and 15 following),
last_value(id) over (order by id rows between 8 following and unbounded following)
from alltypes where id < 10 order by id;
---- RESULTS
0,0,0,0,NULL,NULL,NULL,0,0,0,1,2,3,NULL,0,NULL,0,NULL,NULL,3,2,1,0,NULL,NULL,NULL,3,3,3,3,3,3,NULL,9,NULL,9,NULL,9
0,0,0,0,0,NULL,NULL,0,0,1,2,3,4,NULL,0,NULL,0,NULL,NULL,4,3,2,1,0,NULL,NULL,4,4,4,4,4,4,NULL,9,NULL,9,NULL,9
0,0,0,0,0,0,NULL,0,1,2,3,4,5,NULL,0,NULL,0,NULL,NULL,5,4,3,2,1,0,NULL,5,5,5,5,5,5,NULL,9,NULL,9,NULL,NULL
0,0,0,0,0,0,0,1,2,3,4,5,6,NULL,0,NULL,0,NULL,NULL,6,5,4,3,2,1,0,6,6,6,6,6,6,NULL,9,NULL,9,NULL,NULL
1,1,1,1,1,1,1,2,3,4,5,6,7,NULL,0,NULL,0,NULL,NULL,7,6,5,4,3,2,1,7,7,7,7,7,7,NULL,9,NULL,9,NULL,NULL
2,2,2,2,2,2,2,3,4,5,6,7,8,NULL,0,NULL,0,NULL,NULL,8,7,6,5,4,3,2,8,8,8,8,8,8,NULL,9,NULL,9,NULL,NULL
3,3,3,3,3,3,3,4,5,6,7,8,9,NULL,0,0,0,NULL,NULL,9,8,7,6,5,4,3,9,9,9,9,9,9,NULL,9,0,9,NULL,NULL
4,4,4,4,4,4,4,5,6,7,8,9,NULL,NULL,0,0,1,NULL,NULL,9,9,8,7,6,5,4,9,9,9,9,9,NULL,NULL,9,1,9,NULL,NULL
5,5,5,5,5,5,5,6,7,8,9,NULL,NULL,NULL,0,0,2,NULL,0,9,9,9,8,7,6,5,9,9,9,9,NULL,NULL,NULL,9,2,9,NULL,NULL
6,6,6,6,6,6,6,7,8,9,NULL,NULL,NULL,NULL,0,0,3,NULL,0,9,9,9,9,8,7,6,9,9,9,NULL,NULL,NULL,NULL,9,3,9,NULL,NULL
---- TYPES
INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT
====
---- QUERY
# Test RANGE with UNBOUNDED FOLLOWING
select id,
sum(id) over (order by id range between unbounded preceding and unbounded following)
from alltypesagg where id <= 30 order by id;
---- RESULTS
0,525
0,525
1,525
2,525
3,525
4,525
5,525
6,525
7,525
8,525
9,525
10,525
10,525
11,525
12,525
13,525
14,525
15,525
16,525
17,525
18,525
19,525
20,525
20,525
21,525
22,525
23,525
24,525
25,525
26,525
27,525
28,525
29,525
30,525
30,525
---- TYPES
INT, BIGINT
====
---- QUERY
# IMPALA-1293: DCHECK failure with window ROWS BETWEEN UNBOUNDED PRECEDING
# AND <large> PRECEDING
select count(*), count(t1.c1) from
(select SUM(tinyint_col) OVER (PARTITION BY bigint_col ORDER BY tinyint_col ASC,
id ASC, timestamp_col ASC, day ASC
ROWS BETWEEN UNBOUNDED PRECEDING AND 11 PRECEDING) c1
FROM alltypesagg) t1
---- RESULTS
11000,0
---- TYPES
BIGINT, BIGINT
====
---- QUERY
# Test RANGE with UNBOUNDED FOLLOWING on many partitions
select count(distinct t1.c1) from
(select sum(id) over (partition by day order by tinyint_col
range between unbounded preceding and unbounded following) c1
from alltypesagg) t1;
---- RESULTS
11
---- TYPES
BIGINT
====
---- QUERY
# IMPALA-1296: Fix DCHECK when unnecessary buffered tuple (for sum() analytic) not null
select
sum(month) over (),
dense_rank() over (order by date_string_col)
from alltypes where id < 5
---- RESULTS
5,1
5,1
5,1
5,1
5,1
---- TYPES
BIGINT, BIGINT
====
---- QUERY
# IMPALA-1292: Incorrect result in analytic SUM when ORDER BY column is null
select tinyint_col, id,
SUM(id) OVER (ORDER BY tinyint_col ASC, id ASC)
FROM alltypesagg
where (tinyint_col is NULL or tinyint_col < 2) and id < 100 order by 1, 2
---- RESULTS
1,1,1
1,11,12
1,21,33
1,31,64
1,41,105
1,51,156
1,61,217
1,71,288
1,81,369
1,91,460
NULL,0,460
NULL,0,460
NULL,10,480
NULL,10,480
NULL,20,520
NULL,20,520
NULL,30,580
NULL,30,580
NULL,40,660
NULL,40,660
NULL,50,760
NULL,50,760
NULL,60,880
NULL,60,880
NULL,70,1020
NULL,70,1020
NULL,80,1180
NULL,80,1180
NULL,90,1360
NULL,90,1360
---- TYPES
TINYINT, INT, BIGINT
====
---- QUERY
# IMPALA-1280: Crash running analytic with LEFT SEMI JOIN
select sum(t1.int_col) over (partition by t1.id order by t1.int_col, t1.month)
from alltypestiny t1
where exists (select tt1.month from alltypes tt1 where t1.int_col = tt1.smallint_col)
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0
0
0
1
0
1
1
1
---- TYPES
BIGINT
====
---- QUERY
# Regression test for IMPALA-1223. Query with analytic query with right join.
# Need proper handling of NULLs.
SELECT SUM(t1.int_col) OVER () AS int_col_1 FROM alltypestiny t1
RIGHT JOIN alltypestiny t2 ON t2.tinyint_col = t1.bigint_col AND t2.id = t1.month;
---- RESULTS
0
0
0
0
0
0
0
0
---- TYPES
BIGINT
====
---- QUERY
# Testing crashes in buffered tuple stream related to IMPALA-1306.
SELECT AVG(t1.int_col) OVER () AS double_col_1
FROM alltypestiny t1
WHERE EXISTS
(SELECT t1.month AS int_col_1
FROM alltypestiny t1)
---- RESULTS
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5
---- TYPES
DOUBLE
====
---- QUERY
# IMPALA-1312
SELECT
SUM(t1.int_col * t1.id) OVER (
ORDER BY t1.int_col * t1.id ASC, t3.year + t3.tinyint_col ASC ROWS
BETWEEN 66 PRECEDING AND 21 FOLLOWING) AS int_col_1
FROM alltypes t1
INNER JOIN alltypes t2 ON t2.month = t1.smallint_col
INNER JOIN alltypes t3 ON t3.bigint_col = t2.bigint_col
WHERE
t2.id <= t3.smallint_col + t2.tinyint_col
order by 1
limit 5
---- TYPES
BIGINT
---- RESULTS
22
23
24
25
26
====