mirror of
https://github.com/apache/impala.git
synced 2026-01-03 15:00:52 -05:00
Rejects queries during admission control if: * the largest (across all backends) min buffer reservation is greater than the query mem_limit or buffer_pool_limit * the sum of the min buffer reservations across the cluster is larger than the pool max mem resources There are some other interesting cases to consider later: * every per-backend min buffer reservation is less than the associated backend's process mem_limit; the current admission control code doesn't know about other backend's proc mem_limits. Also reduces minimum non-reservation memory (IMPALA-5810). See the JIRA for experimental results that show this slightly improves min memory requirements for small queries. One reason to tweak this is to compensate for the fact that BufferedBlockMgr didn't count small buffers against the BlockMgr limit, but BufferPool counts all buffers against it. Testing: * Adds new test cases in test_admission_controller.py * Adds BE tests in reservation-tracker-test for the reservation-util code. Change-Id: Iabe87ce8f460356cfe4d1be4d7092c5900f9d79b Reviewed-on: http://gerrit.cloudera.org:8080/7678 Reviewed-by: Matthew Jacobs <mj@cloudera.com> Tested-by: Impala Public Jenkins
2084 lines
60 KiB
Plaintext
2084 lines
60 KiB
Plaintext
====
|
|
---- QUERY
|
|
select year, month,
|
|
count(int_col) over (partition by year, month),
|
|
avg(int_col) over (partition by year, month),
|
|
avg(timestamp_col) over (partition by year, month),
|
|
min(string_col) over (partition by year, month),
|
|
max(string_col) over (partition by year, month)
|
|
from alltypessmall
|
|
where id % 4 = 0 and month != 1;
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
|
|
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
|
|
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
|
|
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
|
|
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
|
|
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
|
|
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
|
|
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
|
|
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
|
|
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
|
|
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
|
|
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
|
|
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
|
|
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
|
|
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
|
|
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
|
|
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
|
|
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
|
|
---- TYPES
|
|
INT, INT, BIGINT, DOUBLE, TIMESTAMP, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
select int_col,
|
|
count(int_col) over ()
|
|
from alltypessmall
|
|
where id % 4 = 0 and month = 1
|
|
order by int_col
|
|
---- RESULTS
|
|
0,7
|
|
0,7
|
|
2,7
|
|
4,7
|
|
4,7
|
|
6,7
|
|
8,7
|
|
---- TYPES
|
|
INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
select tinyint_col,
|
|
count(int_col) over (order by tinyint_col)
|
|
from alltypessmall
|
|
where month = 1 and tinyint_col < 5
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,3
|
|
0,3
|
|
0,3
|
|
1,6
|
|
1,6
|
|
1,6
|
|
2,9
|
|
2,9
|
|
2,9
|
|
3,12
|
|
3,12
|
|
3,12
|
|
4,15
|
|
4,15
|
|
4,15
|
|
---- TYPES
|
|
TINYINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test ordering expressions having NULL values
|
|
select tinyint_col, smallint_col, int_col,
|
|
sum(smallint_col) over (order by tinyint_col nulls first)
|
|
from alltypesagg
|
|
where year = 2010 and month = 1 and day = 4 and id <= 3030 and bool_col = true
|
|
order by tinyint_col, id, smallint_col, int_col
|
|
---- RESULTS
|
|
2,2,2,96
|
|
2,12,12,96
|
|
2,22,22,96
|
|
4,4,4,138
|
|
4,14,14,138
|
|
4,24,24,138
|
|
6,6,6,186
|
|
6,16,16,186
|
|
6,26,26,186
|
|
8,8,8,240
|
|
8,18,18,240
|
|
8,28,28,240
|
|
NULL,NULL,NULL,60
|
|
NULL,10,10,60
|
|
NULL,20,20,60
|
|
NULL,30,30,60
|
|
---- TYPES
|
|
TINYINT, SMALLINT, INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test partitions and ordering expressions having NULL values for both
|
|
# the default window and ROWS windows.
|
|
select tinyint_col, smallint_col,
|
|
sum(smallint_col) over (partition by tinyint_col order by smallint_col),
|
|
sum(smallint_col) over (partition by tinyint_col order by smallint_col
|
|
rows between unbounded preceding and 1 following),
|
|
sum(smallint_col) over (partition by tinyint_col order by smallint_col
|
|
rows between 1 following and 2 following)
|
|
from alltypesagg
|
|
where year = 2010 and month = 1 and day = 4 and id <= 3200
|
|
and (smallint_col < 50 or smallint_col is NULL)
|
|
and (tinyint_col = 1 or tinyint_col is NULL)
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
1,1,2,2,12
|
|
1,1,2,13,22
|
|
1,11,24,24,32
|
|
1,11,24,45,42
|
|
1,21,66,66,52
|
|
1,21,66,97,62
|
|
1,31,128,128,72
|
|
1,31,128,169,82
|
|
1,41,210,210,41
|
|
1,41,210,210,NULL
|
|
NULL,10,20,20,30
|
|
NULL,10,20,40,40
|
|
NULL,20,60,60,50
|
|
NULL,20,60,90,60
|
|
NULL,30,120,120,70
|
|
NULL,30,120,160,80
|
|
NULL,40,200,200,40
|
|
NULL,40,200,200,NULL
|
|
NULL,NULL,200,200,NULL
|
|
NULL,NULL,200,200,NULL
|
|
NULL,NULL,200,200,NULL
|
|
---- TYPES
|
|
TINYINT, SMALLINT, BIGINT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test sum() NULL handling with different sliding windows (sum() has special NULL
|
|
# handling in Remove()).
|
|
select
|
|
sum(tinyint_col) over (order by id nulls last rows between 1 preceding and 1 preceding),
|
|
sum(tinyint_col) over (order by id nulls last rows between 1 preceding and current row),
|
|
sum(tinyint_col) over (order by id nulls last rows between current row and 1 following),
|
|
sum(tinyint_col) over (order by id nulls last rows between 1 following and 1 following)
|
|
from alltypesagg where id < 30 and (tinyint_col is null or tinyint_col < 6)
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
NULL,NULL,NULL,NULL
|
|
NULL,NULL,1,1
|
|
NULL,1,3,2
|
|
1,3,5,3
|
|
2,5,7,4
|
|
3,7,9,5
|
|
4,9,5,NULL
|
|
5,5,NULL,NULL
|
|
NULL,NULL,1,1
|
|
NULL,1,3,2
|
|
1,3,5,3
|
|
2,5,7,4
|
|
3,7,9,5
|
|
4,9,5,NULL
|
|
5,5,NULL,NULL
|
|
NULL,NULL,1,1
|
|
NULL,1,3,2
|
|
1,3,5,3
|
|
2,5,7,4
|
|
3,7,9,5
|
|
4,9,5,NULL
|
|
---- TYPES
|
|
BIGINT, BIGINT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test multiple partition exprs and multiple order by exprs
|
|
select year, month, bool_col, tinyint_col,
|
|
count(id) over (partition by year, month order by bool_col, tinyint_col)
|
|
from alltypes
|
|
where tinyint_col < 2 and month < 3 and (id % 100 < 10)
|
|
order by year, month, bool_col, tinyint_col
|
|
---- RESULTS
|
|
2009,1,false,1,4
|
|
2009,1,false,1,4
|
|
2009,1,false,1,4
|
|
2009,1,false,1,4
|
|
2009,1,true,0,8
|
|
2009,1,true,0,8
|
|
2009,1,true,0,8
|
|
2009,1,true,0,8
|
|
2009,2,false,1,2
|
|
2009,2,false,1,2
|
|
2009,2,true,0,4
|
|
2009,2,true,0,4
|
|
2010,1,false,1,3
|
|
2010,1,false,1,3
|
|
2010,1,false,1,3
|
|
2010,1,true,0,6
|
|
2010,1,true,0,6
|
|
2010,1,true,0,6
|
|
2010,2,false,1,3
|
|
2010,2,false,1,3
|
|
2010,2,false,1,3
|
|
2010,2,true,0,6
|
|
2010,2,true,0,6
|
|
2010,2,true,0,6
|
|
---- TYPES
|
|
INT, INT, BOOLEAN, TINYINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Same as above but with no ordering and a limit. Only count the number of results
|
|
# because they are non-deterministic.
|
|
select count(*) from
|
|
(select year, month, bool_col, tinyint_col,
|
|
count(id) over (partition by year, month order by bool_col, tinyint_col)
|
|
from alltypes
|
|
where tinyint_col < 2 and month < 3 and (id % 100 < 10)
|
|
limit 7) v
|
|
---- RESULTS
|
|
7
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Simple ROWS window
|
|
select (month % 2), int_col,
|
|
sum(int_col) over (partition by (month % 2)
|
|
order by int_col rows between unbounded preceding and current row)
|
|
from alltypestiny
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,0,0
|
|
0,0,0
|
|
0,1,1
|
|
0,1,2
|
|
1,0,0
|
|
1,0,0
|
|
1,1,1
|
|
1,1,2
|
|
---- TYPES
|
|
INT, INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# ROWS windows with an end boundary offset, some before or after all results.
|
|
select
|
|
count(tinyint_col) over (partition by bool_col order by tinyint_col
|
|
rows between unbounded preceding and 1 preceding),
|
|
sum(tinyint_col) over (partition by bool_col order by tinyint_col
|
|
rows between unbounded preceding and 1 following),
|
|
sum(tinyint_col) over (partition by bool_col order by tinyint_col
|
|
rows between unbounded preceding and 3 preceding),
|
|
sum(tinyint_col) over (partition by bool_col order by tinyint_col
|
|
rows between unbounded preceding and 20 following),
|
|
sum(tinyint_col) over (partition by bool_col order by tinyint_col
|
|
rows between unbounded preceding and 20 preceding)
|
|
from alltypesagg where id <= 10 and day = 1
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,4,NULL,25,NULL
|
|
1,9,NULL,25,NULL
|
|
2,16,NULL,25,NULL
|
|
3,25,1,25,NULL
|
|
4,25,4,25,NULL
|
|
0,6,NULL,20,NULL
|
|
1,12,NULL,20,NULL
|
|
2,20,NULL,20,NULL
|
|
3,20,2,20,NULL
|
|
4,20,6,20,NULL
|
|
4,20,12,20,NULL
|
|
---- TYPES
|
|
BIGINT, BIGINT, BIGINT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# multiple analytics in the same select block
|
|
select tinyint_col, int_col, bigint_col,
|
|
count(bigint_col)
|
|
over(partition by tinyint_col, bigint_col order by int_col desc
|
|
rows between unbounded preceding and 1 following),
|
|
max(tinyint_col)
|
|
over(partition by bigint_col, tinyint_col order by int_col desc
|
|
rows between unbounded preceding and 1 following),
|
|
# different window
|
|
min(int_col)
|
|
over(partition by bigint_col, tinyint_col order by int_col desc
|
|
rows between unbounded preceding and 2 following),
|
|
# different sort order but same partition
|
|
max(int_col)
|
|
over(partition by bigint_col, tinyint_col order by int_col asc
|
|
rows between unbounded preceding and 2 following),
|
|
# different partition
|
|
sum(int_col)
|
|
over(partition by tinyint_col order by int_col desc
|
|
rows between unbounded preceding and 2 following)
|
|
from functional.alltypesagg
|
|
where id < 20
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
1,1,10,1,1,1,1,12
|
|
1,11,110,1,1,11,11,12
|
|
2,2,20,1,2,2,2,14
|
|
2,12,120,1,2,12,12,14
|
|
3,3,30,1,3,3,3,16
|
|
3,13,130,1,3,13,13,16
|
|
4,4,40,1,4,4,4,18
|
|
4,14,140,1,4,14,14,18
|
|
5,5,50,1,5,5,5,20
|
|
5,15,150,1,5,15,15,20
|
|
6,6,60,1,6,6,6,22
|
|
6,16,160,1,6,16,16,22
|
|
7,7,70,1,7,7,7,24
|
|
7,17,170,1,7,17,17,24
|
|
8,8,80,1,8,8,8,26
|
|
8,18,180,1,8,18,18,26
|
|
9,9,90,1,9,9,9,28
|
|
9,19,190,1,9,19,19,28
|
|
NULL,10,100,2,NULL,10,10,20
|
|
NULL,10,100,2,NULL,10,10,20
|
|
NULL,NULL,NULL,0,NULL,NULL,NULL,10
|
|
NULL,NULL,NULL,0,NULL,NULL,NULL,20
|
|
---- TYPES
|
|
TINYINT, INT, BIGINT, BIGINT, TINYINT, INT, INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# analytics on a grouped aggregation with a final order by
|
|
select bool_col,
|
|
sum(min(int_col))
|
|
over(partition by min(tinyint_col) order by max(int_col)
|
|
rows between unbounded preceding and 1 following),
|
|
max(sum(bigint_col))
|
|
over(partition by min(tinyint_col) order by max(int_col)
|
|
rows between unbounded preceding and 1 following),
|
|
min(sum(bigint_col))
|
|
over(partition by min(tinyint_col) order by sum(int_col)
|
|
rows between unbounded preceding and 4 following)
|
|
from functional.alltypes
|
|
group by 1
|
|
order by 1, 2, 3
|
|
---- RESULTS
|
|
false,1,182500,182500
|
|
true,0,146000,146000
|
|
---- TYPES
|
|
BOOLEAN, BIGINT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test for IMPALA-1200
|
|
select DISTINCT t1.c1 FROM
|
|
(SELECT MAX(smallint_col) OVER (PARTITION BY year ORDER BY id DESC) AS c1
|
|
FROM alltypesagg) t1
|
|
---- RESULTS
|
|
99
|
|
---- TYPES
|
|
SMALLINT
|
|
====
|
|
---- QUERY
|
|
# Test for IMPALA-1233
|
|
with with_1 AS (
|
|
SELECT RANK() OVER (ORDER BY t1.id) AS int_col_1
|
|
FROM alltypesagg t1)
|
|
SELECT COUNT(t1.int_col_1) AS int_col_1
|
|
FROM with_1 t1
|
|
WHERE t1.int_col_1 IS NULL
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select tinyint_col, id,
|
|
row_number() over (order by tinyint_col, id) total_order,
|
|
row_number() over (partition by tinyint_col order by id) partitioned_order,
|
|
first_value(id) over (partition by tinyint_col order by id),
|
|
first_value(cast(id as string)) over (partition by tinyint_col order by id),
|
|
last_value(id) over (partition by tinyint_col order by id),
|
|
last_value(cast(id as string)) over (partition by tinyint_col order by id),
|
|
last_value(id) over (partition by tinyint_col order by id
|
|
rows between unbounded preceding and unbounded following)
|
|
from alltypes where id < 25 order by tinyint_col, id
|
|
---- RESULTS
|
|
0,0,1,1,0,'0',0,'0',20
|
|
0,10,2,2,0,'0',10,'10',20
|
|
0,20,3,3,0,'0',20,'20',20
|
|
1,1,4,1,1,'1',1,'1',21
|
|
1,11,5,2,1,'1',11,'11',21
|
|
1,21,6,3,1,'1',21,'21',21
|
|
2,2,7,1,2,'2',2,'2',22
|
|
2,12,8,2,2,'2',12,'12',22
|
|
2,22,9,3,2,'2',22,'22',22
|
|
3,3,10,1,3,'3',3,'3',23
|
|
3,13,11,2,3,'3',13,'13',23
|
|
3,23,12,3,3,'3',23,'23',23
|
|
4,4,13,1,4,'4',4,'4',24
|
|
4,14,14,2,4,'4',14,'14',24
|
|
4,24,15,3,4,'4',24,'24',24
|
|
5,5,16,1,5,'5',5,'5',15
|
|
5,15,17,2,5,'5',15,'15',15
|
|
6,6,18,1,6,'6',6,'6',16
|
|
6,16,19,2,6,'6',16,'16',16
|
|
7,7,20,1,7,'7',7,'7',17
|
|
7,17,21,2,7,'7',17,'17',17
|
|
8,8,22,1,8,'8',8,'8',18
|
|
8,18,23,2,8,'8',18,'18',18
|
|
9,9,24,1,9,'9',9,'9',19
|
|
9,19,25,2,9,'9',19,'19',19
|
|
---- TYPES
|
|
TINYINT, INT, BIGINT, BIGINT, INT, STRING, INT, STRING, INT
|
|
====
|
|
---- QUERY
|
|
select bool_col, id,
|
|
lag(id, 2, -12345) over (partition by bool_col order by id),
|
|
lead(id, 2, -12345) over (partition by bool_col order by id)
|
|
from alltypestiny
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
false,1,-12345,5
|
|
false,3,-12345,7
|
|
false,5,1,-12345
|
|
false,7,3,-12345
|
|
true,0,-12345,4
|
|
true,2,-12345,6
|
|
true,4,0,-12345
|
|
true,6,2,-12345
|
|
---- TYPES
|
|
BOOLEAN, INT, INT, INT
|
|
====
|
|
---- QUERY
|
|
# Tests rank() and dense_rank()
|
|
select year, month, bool_col, tinyint_col,
|
|
rank() over (partition by year, month order by bool_col, tinyint_col),
|
|
dense_rank() over (partition by year, month order by bool_col, tinyint_col)
|
|
from alltypes
|
|
where tinyint_col < 2 and month < 3 and (id % 100 < 10)
|
|
order by year, month, bool_col, tinyint_col
|
|
---- RESULTS
|
|
2009,1,false,1,1,1
|
|
2009,1,false,1,1,1
|
|
2009,1,false,1,1,1
|
|
2009,1,false,1,1,1
|
|
2009,1,true,0,5,2
|
|
2009,1,true,0,5,2
|
|
2009,1,true,0,5,2
|
|
2009,1,true,0,5,2
|
|
2009,2,false,1,1,1
|
|
2009,2,false,1,1,1
|
|
2009,2,true,0,3,2
|
|
2009,2,true,0,3,2
|
|
2010,1,false,1,1,1
|
|
2010,1,false,1,1,1
|
|
2010,1,false,1,1,1
|
|
2010,1,true,0,4,2
|
|
2010,1,true,0,4,2
|
|
2010,1,true,0,4,2
|
|
2010,2,false,1,1,1
|
|
2010,2,false,1,1,1
|
|
2010,2,false,1,1,1
|
|
2010,2,true,0,4,2
|
|
2010,2,true,0,4,2
|
|
2010,2,true,0,4,2
|
|
---- TYPES
|
|
INT, INT, BOOLEAN, TINYINT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# analytics with inline views
|
|
select bool_col, a, b, a+b from
|
|
(select
|
|
bool_col,
|
|
count(int_col) over() a,
|
|
sum(int_col + bigint_col) over(partition by bool_col) b
|
|
from
|
|
(select * from functional.alltypes where id < 20) v1) v2
|
|
order by 1, 2, 3
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
false,20,550,570
|
|
false,20,550,570
|
|
false,20,550,570
|
|
false,20,550,570
|
|
false,20,550,570
|
|
false,20,550,570
|
|
false,20,550,570
|
|
false,20,550,570
|
|
false,20,550,570
|
|
false,20,550,570
|
|
true,20,440,460
|
|
true,20,440,460
|
|
true,20,440,460
|
|
true,20,440,460
|
|
true,20,440,460
|
|
true,20,440,460
|
|
true,20,440,460
|
|
true,20,440,460
|
|
true,20,440,460
|
|
true,20,440,460
|
|
---- TYPES
|
|
BOOLEAN, BIGINT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# analytics and unions
|
|
select 1 a, min(id) over (partition by int_col)
|
|
from functional.alltypes where id < 5
|
|
union distinct
|
|
select 2, max(id) over (partition by bool_col)
|
|
from functional.alltypessmall where id >= 5 and id <= 10
|
|
union all
|
|
(select 3, sum(bigint_col) over (partition by int_col order by id)
|
|
from functional.alltypestiny where bool_col = false)
|
|
order by 1, 2 desc nulls first
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
1,4
|
|
1,3
|
|
1,2
|
|
1,1
|
|
1,0
|
|
2,10
|
|
2,9
|
|
3,40
|
|
3,30
|
|
3,20
|
|
3,10
|
|
---- TYPES
|
|
TINYINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# analytics in an uncorrelated subquery
|
|
select id, int_col, bool_col from functional.alltypestiny t1
|
|
where int_col in
|
|
(select min(bigint_col) over(partition by bool_col)
|
|
from functional.alltypessmall t2 where t2.id < 10)
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,0,true
|
|
2,0,true
|
|
4,0,true
|
|
6,0,true
|
|
---- TYPES
|
|
INT, INT, BOOLEAN
|
|
====
|
|
---- QUERY
|
|
# test conjunct evaluation in analytic nodes
|
|
select * from
|
|
(select bool_col, tinyint_col,
|
|
sum(tinyint_col) over (partition by bool_col order by tinyint_col
|
|
rows between unbounded preceding and 1 following) a
|
|
from alltypesagg where id <= 10 and day = 1) v
|
|
where
|
|
v.tinyint_col * 3 > v.a and
|
|
cast(v.a as boolean) = v.bool_col and
|
|
v.a is not null
|
|
---- RESULTS
|
|
true,8,20
|
|
---- TYPES
|
|
BOOLEAN, TINYINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test ROWS windows with start boundaries
|
|
select id,
|
|
count(id) over (order by id rows between 3 preceding and 3 preceding) c1,
|
|
count(id) over (order by id rows between 3 preceding and 2 preceding) c2,
|
|
count(id) over (order by id rows between 3 preceding and 1 preceding) c3,
|
|
count(id) over (order by id rows between 3 preceding and current row) c4,
|
|
count(id) over (order by id rows between 3 preceding and 1 following) c5,
|
|
count(id) over (order by id rows between 3 preceding and 2 following) c6,
|
|
count(id) over (order by id rows between 3 preceding and 3 following) c7,
|
|
count(id) over (order by id rows between 2 preceding and 3 following) c8,
|
|
count(id) over (order by id rows between 1 preceding and 3 following) c9,
|
|
count(id) over (order by id rows between current row and 3 following) c10,
|
|
count(id) over (order by id rows between 1 following and 3 following) c11,
|
|
count(id) over (order by id rows between 2 following and 3 following) c12,
|
|
count(id) over (order by id rows between 3 following and 3 following) c13
|
|
from alltypes where id < 8
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,0,0,0,1,2,3,4,4,4,4,3,2,1
|
|
1,0,0,1,2,3,4,5,5,5,4,3,2,1
|
|
2,0,1,2,3,4,5,6,6,5,4,3,2,1
|
|
3,1,2,3,4,5,6,7,6,5,4,3,2,1
|
|
4,1,2,3,4,5,6,7,6,5,4,3,2,1
|
|
5,1,2,3,4,5,6,6,5,4,3,2,1,0
|
|
6,1,2,3,4,5,5,5,4,3,2,1,0,0
|
|
7,1,2,3,4,4,4,4,3,2,1,0,0,0
|
|
---- TYPES
|
|
INT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test sum() and avg() removing values
|
|
select id,
|
|
sum(int_col) over (order by id rows between 1 preceding and 1 following),
|
|
sum(double_col) over (order by id rows between 3 preceding and 2 preceding),
|
|
avg(int_col) over (order by id rows between 1 preceding and 1 following),
|
|
avg(double_col) over (order by id rows between 3 preceding and 2 preceding),
|
|
avg(timestamp_col) over (order by id rows between 2 following and 3 following)
|
|
from alltypes where id < 8
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,1,NULL,0.5,NULL,2009-01-01 00:02:30.199999809
|
|
1,3,NULL,1,NULL,2009-01-01 00:03:30.449999809
|
|
2,6,0,2,0,2009-01-01 00:04:30.349999904
|
|
3,9,10.1,3,5.05,2009-01-01 00:05:30.124999999
|
|
4,12,30.3,4,15.15,2009-01-01 00:06:30.180000066
|
|
5,15,50.49999999999999,5,25.25,2009-01-01 00:07:00.210000038
|
|
6,18,70.69999999999999,6,35.34999999999999,NULL
|
|
7,13,90.89999999999999,6.5,45.45,NULL
|
|
---- TYPES
|
|
INT, BIGINT, DOUBLE, DOUBLE, DOUBLE, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# More testing of start bounds. This exposed a bug in removing
|
|
# values from the window after the partition.
|
|
select tinyint_col, int_col,
|
|
sum(int_col) over (partition by tinyint_col order by int_col nulls last rows between 2 following and 3 following),
|
|
sum(int_col) over (partition by tinyint_col order by int_col nulls last rows between 2 following and 10 following),
|
|
sum(int_col) over (partition by tinyint_col order by int_col nulls last rows between 6 following and 100 following)
|
|
from alltypesagg
|
|
where (tinyint_col is NULL or tinyint_col < 3) and id < 60;
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
NULL,10,40,280,180
|
|
NULL,10,50,260,140
|
|
NULL,20,60,240,100
|
|
NULL,20,70,210,50
|
|
NULL,30,80,180,NULL
|
|
NULL,30,90,140,NULL
|
|
NULL,40,100,100,NULL
|
|
NULL,40,50,50,NULL
|
|
NULL,50,NULL,NULL,NULL
|
|
NULL,50,NULL,NULL,NULL
|
|
NULL,NULL,NULL,NULL,NULL
|
|
NULL,NULL,NULL,NULL,NULL
|
|
1,1,52,144,NULL
|
|
1,11,72,123,NULL
|
|
1,21,92,92,NULL
|
|
1,31,51,51,NULL
|
|
1,41,NULL,NULL,NULL
|
|
1,51,NULL,NULL,NULL
|
|
2,2,54,148,NULL
|
|
2,12,74,126,NULL
|
|
2,22,94,94,NULL
|
|
2,32,52,52,NULL
|
|
2,42,NULL,NULL,NULL
|
|
2,52,NULL,NULL,NULL
|
|
---- TYPES
|
|
TINYINT, INT, BIGINT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1273: Incorrect results with very large window and small table
|
|
select
|
|
count(int_col) over (order by bigint_col rows between 95 preceding and 40 following)
|
|
from alltypestiny
|
|
---- RESULTS
|
|
8
|
|
8
|
|
8
|
|
8
|
|
8
|
|
8
|
|
8
|
|
8
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select count(distinct t1.c1) from
|
|
(select count(id) over (order by id rows between 550 preceding and 550 following) c1
|
|
from alltypesagg) t1
|
|
---- RESULTS
|
|
551
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select count(distinct t1.c1) from
|
|
(select count(id) over (order by id rows between 1050 preceding and 2 following) c1
|
|
from alltypesagg) t1
|
|
---- RESULTS
|
|
1051
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select count(distinct t1.c1) from
|
|
(select count(id) over (order by id rows between 1050 preceding and 500 preceding) c1
|
|
from alltypesagg) t1
|
|
---- RESULTS
|
|
552
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select count(distinct t1.c1) from
|
|
(select count(id) over (order by id rows between 2 following and 1050 following) c1
|
|
from alltypesagg) t1
|
|
---- RESULTS
|
|
1050
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select count(distinct t1.c1) from
|
|
(select count(id) over (order by id rows between 500 following and 1050 following) c1
|
|
from alltypesagg) t1
|
|
---- RESULTS
|
|
552
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select (tinyint_col % 4), id,
|
|
count(id) over (partition by (tinyint_col % 4) order by id
|
|
rows between 3 following and 10 following)
|
|
from alltypes where id < 20
|
|
order by (tinyint_col % 4), id
|
|
---- RESULTS
|
|
0,0,3
|
|
0,4,2
|
|
0,8,1
|
|
0,10,0
|
|
0,14,0
|
|
0,18,0
|
|
1,1,3
|
|
1,5,2
|
|
1,9,1
|
|
1,11,0
|
|
1,15,0
|
|
1,19,0
|
|
2,2,1
|
|
2,6,0
|
|
2,12,0
|
|
2,16,0
|
|
3,3,1
|
|
3,7,0
|
|
3,13,0
|
|
3,17,0
|
|
---- TYPES
|
|
TINYINT, INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test IMPALA-1269: ROWS windows with offset to current row might not remove
|
|
# correct values after new partitions
|
|
select sum(t2.int_col_1) from
|
|
(select SUM(t1.int_col) OVER (PARTITION BY t1.smallint_col ORDER BY t1.day, t1.int_col
|
|
ROWS 7 PRECEDING) AS int_col_1
|
|
FROM alltypesagg t1 ) t2
|
|
---- RESULTS
|
|
41790600
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test FIRST_VALUE with strings and null handling
|
|
select cast(floor(id/3) as int), tinyint_col,
|
|
first_value(tinyint_col) over (partition by cast(floor(id/3) as int) order by tinyint_col nulls first),
|
|
first_value(cast(tinyint_col as string)) over (partition by cast(floor(id/3) as int) order by cast(tinyint_col as string) nulls first),
|
|
last_value(tinyint_col) over (partition by cast(floor(id/3) as int) order by tinyint_col nulls first),
|
|
last_value(cast(tinyint_col as string)) over (partition by cast(floor(id/3) as int) order by cast(tinyint_col as string) nulls first)
|
|
from alltypesagg where id < 50 and (tinyint_col is null or tinyint_col < 4)
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,NULL,NULL,'NULL',NULL,'NULL'
|
|
0,NULL,NULL,'NULL',NULL,'NULL'
|
|
0,1,NULL,'NULL',1,'1'
|
|
0,2,NULL,'NULL',2,'2'
|
|
1,3,3,'3',3,'3'
|
|
3,NULL,NULL,'NULL',NULL,'NULL'
|
|
3,NULL,NULL,'NULL',NULL,'NULL'
|
|
3,1,NULL,'NULL',1,'1'
|
|
4,2,2,'2',2,'2'
|
|
4,3,2,'2',3,'3'
|
|
6,NULL,NULL,'NULL',NULL,'NULL'
|
|
6,NULL,NULL,'NULL',NULL,'NULL'
|
|
7,1,1,'1',1,'1'
|
|
7,2,1,'1',2,'2'
|
|
7,3,1,'1',3,'3'
|
|
10,NULL,NULL,'NULL',NULL,'NULL'
|
|
10,NULL,NULL,'NULL',NULL,'NULL'
|
|
10,1,NULL,'NULL',1,'1'
|
|
10,2,NULL,'NULL',2,'2'
|
|
11,3,3,'3',3,'3'
|
|
13,NULL,NULL,'NULL',NULL,'NULL'
|
|
13,NULL,NULL,'NULL',NULL,'NULL'
|
|
13,1,NULL,'NULL',1,'1'
|
|
14,2,2,'2',2,'2'
|
|
14,3,2,'2',3,'3'
|
|
---- TYPES
|
|
INT, TINYINT, TINYINT, STRING, TINYINT, STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1302: Incorrect result of FIRST_VALUE query.
|
|
select
|
|
first_value(id) over (order by id rows between 3 preceding and 3 following),
|
|
first_value(id) over (order by id rows between 3 preceding and 2 following),
|
|
first_value(id) over (order by id rows between 3 preceding and 1 following),
|
|
first_value(id) over (order by id rows between 3 preceding and current row),
|
|
first_value(id) over (order by id rows between 3 preceding and 1 preceding),
|
|
first_value(id) over (order by id rows between 3 preceding and 2 preceding),
|
|
first_value(id) over (order by id rows between 3 preceding and 3 preceding),
|
|
first_value(id) over (order by id rows between 2 preceding and 3 following),
|
|
first_value(id) over (order by id rows between 1 preceding and 3 following),
|
|
first_value(id) over (order by id rows between current row and 3 following),
|
|
first_value(id) over (order by id rows between 1 following and 3 following),
|
|
first_value(id) over (order by id rows between 2 following and 3 following),
|
|
first_value(id) over (order by id rows between 3 following and 3 following),
|
|
first_value(id) over (order by id rows between 15 preceding and 15 preceding),
|
|
first_value(id) over (order by id rows between 15 preceding and 15 following),
|
|
first_value(id) over (order by id rows between 15 preceding and 6 preceding),
|
|
first_value(id) over (order by id rows between 6 preceding and 15 following),
|
|
first_value(id) over (order by id rows between 15 following and 15 following),
|
|
first_value(id) over (order by id rows between unbounded preceding and 8 preceding),
|
|
last_value(id) over (order by id rows between 3 preceding and 3 following),
|
|
last_value(id) over (order by id rows between 3 preceding and 2 following),
|
|
last_value(id) over (order by id rows between 3 preceding and 1 following),
|
|
last_value(id) over (order by id rows between 3 preceding and current row),
|
|
last_value(id) over (order by id rows between 3 preceding and 1 preceding),
|
|
last_value(id) over (order by id rows between 3 preceding and 2 preceding),
|
|
last_value(id) over (order by id rows between 3 preceding and 3 preceding),
|
|
last_value(id) over (order by id rows between 2 preceding and 3 following),
|
|
last_value(id) over (order by id rows between 1 preceding and 3 following),
|
|
last_value(id) over (order by id rows between current row and 3 following),
|
|
last_value(id) over (order by id rows between 1 following and 3 following),
|
|
last_value(id) over (order by id rows between 2 following and 3 following),
|
|
last_value(id) over (order by id rows between 3 following and 3 following),
|
|
last_value(id) over (order by id rows between 15 preceding and 15 preceding),
|
|
last_value(id) over (order by id rows between 15 preceding and 15 following),
|
|
last_value(id) over (order by id rows between 15 preceding and 6 preceding),
|
|
last_value(id) over (order by id rows between 6 preceding and 15 following),
|
|
last_value(id) over (order by id rows between 15 following and 15 following),
|
|
last_value(id) over (order by id rows between 8 following and unbounded following)
|
|
from alltypes where id < 10 order by id;
|
|
---- RESULTS
|
|
0,0,0,0,NULL,NULL,NULL,0,0,0,1,2,3,NULL,0,NULL,0,NULL,NULL,3,2,1,0,NULL,NULL,NULL,3,3,3,3,3,3,NULL,9,NULL,9,NULL,9
|
|
0,0,0,0,0,NULL,NULL,0,0,1,2,3,4,NULL,0,NULL,0,NULL,NULL,4,3,2,1,0,NULL,NULL,4,4,4,4,4,4,NULL,9,NULL,9,NULL,9
|
|
0,0,0,0,0,0,NULL,0,1,2,3,4,5,NULL,0,NULL,0,NULL,NULL,5,4,3,2,1,0,NULL,5,5,5,5,5,5,NULL,9,NULL,9,NULL,NULL
|
|
0,0,0,0,0,0,0,1,2,3,4,5,6,NULL,0,NULL,0,NULL,NULL,6,5,4,3,2,1,0,6,6,6,6,6,6,NULL,9,NULL,9,NULL,NULL
|
|
1,1,1,1,1,1,1,2,3,4,5,6,7,NULL,0,NULL,0,NULL,NULL,7,6,5,4,3,2,1,7,7,7,7,7,7,NULL,9,NULL,9,NULL,NULL
|
|
2,2,2,2,2,2,2,3,4,5,6,7,8,NULL,0,NULL,0,NULL,NULL,8,7,6,5,4,3,2,8,8,8,8,8,8,NULL,9,NULL,9,NULL,NULL
|
|
3,3,3,3,3,3,3,4,5,6,7,8,9,NULL,0,0,0,NULL,NULL,9,8,7,6,5,4,3,9,9,9,9,9,9,NULL,9,0,9,NULL,NULL
|
|
4,4,4,4,4,4,4,5,6,7,8,9,NULL,NULL,0,0,1,NULL,NULL,9,9,8,7,6,5,4,9,9,9,9,9,NULL,NULL,9,1,9,NULL,NULL
|
|
5,5,5,5,5,5,5,6,7,8,9,NULL,NULL,NULL,0,0,2,NULL,0,9,9,9,8,7,6,5,9,9,9,9,NULL,NULL,NULL,9,2,9,NULL,NULL
|
|
6,6,6,6,6,6,6,7,8,9,NULL,NULL,NULL,NULL,0,0,3,NULL,0,9,9,9,9,8,7,6,9,9,9,NULL,NULL,NULL,NULL,9,3,9,NULL,NULL
|
|
---- TYPES
|
|
INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT
|
|
====
|
|
---- QUERY
|
|
# Test RANGE with UNBOUNDED FOLLOWING
|
|
select id,
|
|
sum(id) over (order by id range between unbounded preceding and unbounded following)
|
|
from alltypesagg where id <= 30 order by id;
|
|
---- RESULTS
|
|
0,525
|
|
0,525
|
|
1,525
|
|
2,525
|
|
3,525
|
|
4,525
|
|
5,525
|
|
6,525
|
|
7,525
|
|
8,525
|
|
9,525
|
|
10,525
|
|
10,525
|
|
11,525
|
|
12,525
|
|
13,525
|
|
14,525
|
|
15,525
|
|
16,525
|
|
17,525
|
|
18,525
|
|
19,525
|
|
20,525
|
|
20,525
|
|
21,525
|
|
22,525
|
|
23,525
|
|
24,525
|
|
25,525
|
|
26,525
|
|
27,525
|
|
28,525
|
|
29,525
|
|
30,525
|
|
30,525
|
|
---- TYPES
|
|
INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1293: DCHECK failure with window ROWS BETWEEN UNBOUNDED PRECEDING
|
|
# AND <large> PRECEDING
|
|
select count(*), count(t1.c1) from
|
|
(select SUM(tinyint_col) OVER (PARTITION BY bigint_col ORDER BY tinyint_col ASC,
|
|
id ASC, timestamp_col ASC, day ASC
|
|
ROWS BETWEEN UNBOUNDED PRECEDING AND 11 PRECEDING) c1
|
|
FROM alltypesagg) t1
|
|
---- RESULTS
|
|
11000,0
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test RANGE with UNBOUNDED FOLLOWING on many partitions
|
|
select count(distinct t1.c1) from
|
|
(select sum(id) over (partition by day order by tinyint_col
|
|
range between unbounded preceding and unbounded following) c1
|
|
from alltypesagg) t1;
|
|
---- RESULTS
|
|
11
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1296: Fix DCHECK when unnecessary buffered tuple (for sum() analytic) not null
|
|
select
|
|
sum(month) over (),
|
|
dense_rank() over (order by date_string_col)
|
|
from alltypes where id < 5
|
|
---- RESULTS
|
|
5,1
|
|
5,1
|
|
5,1
|
|
5,1
|
|
5,1
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1292: Incorrect result in analytic SUM when ORDER BY column is null
|
|
select tinyint_col, id,
|
|
SUM(id) OVER (ORDER BY tinyint_col ASC, id ASC)
|
|
FROM alltypesagg
|
|
where (tinyint_col is NULL or tinyint_col < 2) and id < 100 order by 1, 2
|
|
---- RESULTS
|
|
1,1,1
|
|
1,11,12
|
|
1,21,33
|
|
1,31,64
|
|
1,41,105
|
|
1,51,156
|
|
1,61,217
|
|
1,71,288
|
|
1,81,369
|
|
1,91,460
|
|
NULL,0,460
|
|
NULL,0,460
|
|
NULL,10,480
|
|
NULL,10,480
|
|
NULL,20,520
|
|
NULL,20,520
|
|
NULL,30,580
|
|
NULL,30,580
|
|
NULL,40,660
|
|
NULL,40,660
|
|
NULL,50,760
|
|
NULL,50,760
|
|
NULL,60,880
|
|
NULL,60,880
|
|
NULL,70,1020
|
|
NULL,70,1020
|
|
NULL,80,1180
|
|
NULL,80,1180
|
|
NULL,90,1360
|
|
NULL,90,1360
|
|
---- TYPES
|
|
TINYINT, INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1280: Crash running analytic with LEFT SEMI JOIN
|
|
select sum(t1.int_col) over (partition by t1.id order by t1.int_col, t1.month)
|
|
from alltypestiny t1
|
|
where exists (select tt1.month from alltypes tt1 where t1.int_col = tt1.smallint_col)
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0
|
|
0
|
|
0
|
|
1
|
|
0
|
|
1
|
|
1
|
|
1
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-1223. Query with analytic query with right join.
|
|
# Need proper handling of NULLs.
|
|
SELECT SUM(t1.int_col) OVER () AS int_col_1 FROM alltypestiny t1
|
|
RIGHT JOIN alltypestiny t2 ON t2.tinyint_col = t1.bigint_col AND t2.id = t1.month;
|
|
---- RESULTS
|
|
0
|
|
0
|
|
0
|
|
0
|
|
0
|
|
0
|
|
0
|
|
0
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
## IMPALA-1312: Fix DCHECK failure in BufferedTupleStream
|
|
## Disabled due to slow execution without codegen, breaks exhaustive runs (IMPALA-1414)
|
|
#SELECT
|
|
# SUM(t1.int_col * t1.id) OVER (
|
|
# ORDER BY t1.int_col * t1.id ASC, t3.year + t3.tinyint_col ASC ROWS
|
|
# BETWEEN 66 PRECEDING AND 21 FOLLOWING) AS int_col_1
|
|
#FROM alltypes t1
|
|
#INNER JOIN alltypes t2 ON t2.month = t1.smallint_col
|
|
#INNER JOIN alltypes t3 ON t3.bigint_col = t2.bigint_col
|
|
#WHERE
|
|
#t2.id <= t3.smallint_col + t2.tinyint_col
|
|
#order by 1
|
|
#limit 5
|
|
#---- TYPES
|
|
#BIGINT
|
|
#---- RESULTS
|
|
#22
|
|
#23
|
|
#24
|
|
#25
|
|
#26
|
|
#====
|
|
#----QUERY
|
|
# Testing crashes in buffered tuple stream related to IMPALA-1306.
|
|
SELECT AVG(t1.int_col) OVER () AS double_col_1
|
|
FROM alltypestiny t1
|
|
WHERE EXISTS
|
|
(SELECT t1.month AS int_col_1
|
|
FROM alltypestiny t1)
|
|
---- RESULTS
|
|
0.5
|
|
0.5
|
|
0.5
|
|
0.5
|
|
0.5
|
|
0.5
|
|
0.5
|
|
0.5
|
|
---- TYPES
|
|
DOUBLE
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1808: handle partition/order by exprs with NaN
|
|
select id / int_col,
|
|
count(*) over (partition by id / int_col order by id),
|
|
count(*) over (order by id / int_col),
|
|
count(*) over (order by id / int_col rows between 100 preceding and 90 preceding),
|
|
count(*) over (order by id / int_col rows between 2 preceding and 1 preceding),
|
|
count(*) over (order by id / int_col rows between 1 preceding and 1 following),
|
|
count(*) over (order by id / int_col rows between 1 following and 2 following),
|
|
count(*) over (order by id / int_col rows between 90 following and 100 following)
|
|
from alltypestiny
|
|
---- RESULTS
|
|
NaN,1,1,0,0,2,2,0
|
|
1,1,2,0,1,3,2,0
|
|
3,1,3,0,2,3,2,0
|
|
5,1,4,0,2,3,2,0
|
|
7,1,5,0,2,3,2,0
|
|
Infinity,1,8,0,2,3,2,0
|
|
Infinity,2,8,0,2,3,1,0
|
|
Infinity,3,8,0,2,2,0,0
|
|
---- TYPES
|
|
DOUBLE, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT
|
|
====
|
|
----QUERY
|
|
# IMPALA-1483: Check that TupleIsNullPredicates referring to the logical
|
|
# analytic output are substituted with TupleIsNullPredicates referring to
|
|
# the physical output.
|
|
select t2.a, t2.id, t2.int_col
|
|
from functional.alltypes t1
|
|
left outer join
|
|
(select sum(int_col) over(order by id) a, id, int_col is null as int_col
|
|
from functional.alltypes
|
|
where year = 2009 and month between 1 and 2) t2
|
|
on (t1.id = t2.id)
|
|
where year = 2009 and month = 1
|
|
order by t2.id limit 10
|
|
---- RESULTS
|
|
0,0,false
|
|
1,1,false
|
|
3,2,false
|
|
6,3,false
|
|
10,4,false
|
|
15,5,false
|
|
21,6,false
|
|
28,7,false
|
|
36,8,false
|
|
45,9,false
|
|
---- TYPES
|
|
BIGINT, INT, BOOLEAN
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1542: Analytic fn containing arithmetic expr on NULL crashed
|
|
select lag(coalesce(505, 1 + NULL), 1) over (order by int_col desc)
|
|
from functional.alltypestiny
|
|
---- RESULTS
|
|
NULL
|
|
505
|
|
505
|
|
505
|
|
505
|
|
505
|
|
505
|
|
505
|
|
---- TYPES
|
|
SMALLINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1562: AnalyticEvalNode not properly handling nullable tuple
|
|
select max(t3.c1) from
|
|
(select min(t2.string_col) over () c1
|
|
from alltypesagg t1
|
|
left join alltypes t2 on t2.string_col = t1.string_col ) t3
|
|
---- RESULTS
|
|
'0'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1559: FIRST_VALUE rewrite function intermediate type not matching slot type
|
|
select
|
|
first_value(-32.9) over (order by id rows between 92 preceding and unbounded following),
|
|
first_value(1.1) over (order by id rows between 92 preceding and 1 preceding)
|
|
from alltypestiny
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
-32.9,NULL
|
|
-32.9,1.1
|
|
-32.9,1.1
|
|
-32.9,1.1
|
|
-32.9,1.1
|
|
-32.9,1.1
|
|
-32.9,1.1
|
|
-32.9,1.1
|
|
---- TYPES
|
|
DECIMAL, DECIMAL
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1888: FIRST_VALUE produces incorrect results when the window
|
|
# completely precedes all rows in a partition.
|
|
select id, date_string_col,
|
|
first_value(id) over (partition by date_string_col order by id
|
|
rows between 10 preceding and 3 preceding)
|
|
from alltypes where id < 15 order by id
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,'01/01/09',NULL
|
|
1,'01/01/09',NULL
|
|
2,'01/01/09',NULL
|
|
3,'01/01/09',0
|
|
4,'01/01/09',0
|
|
5,'01/01/09',0
|
|
6,'01/01/09',0
|
|
7,'01/01/09',0
|
|
8,'01/01/09',0
|
|
9,'01/01/09',0
|
|
10,'01/02/09',NULL
|
|
11,'01/02/09',NULL
|
|
12,'01/02/09',NULL
|
|
13,'01/02/09',10
|
|
14,'01/02/09',10
|
|
---- TYPES
|
|
INT, STRING, INT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1519: Check that the first analytic sort of a select block
|
|
# materializes TupleIsNullPredicates to be substituted in ancestor nodes.
|
|
select
|
|
sum(t1.id) over (partition by t1.bool_col),
|
|
count(1) over (order by t1.int_col),
|
|
avg(g) over (order by f),
|
|
t2.a,
|
|
t2.d
|
|
from functional.alltypestiny t1
|
|
left outer join
|
|
(select
|
|
id as a,
|
|
coalesce(id, 10) as b,
|
|
int_col as c,
|
|
coalesce(int_col, 20) as d,
|
|
bigint_col e,
|
|
coalesce(bigint_col, 30) as f,
|
|
coalesce(id + bigint_col, 40) as g
|
|
from functional.alltypestiny) t2
|
|
on (t1.id = t2.a + 100)
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
12,4,NULL,NULL,NULL
|
|
12,4,NULL,NULL,NULL
|
|
12,4,NULL,NULL,NULL
|
|
12,4,NULL,NULL,NULL
|
|
16,8,NULL,NULL,NULL
|
|
16,8,NULL,NULL,NULL
|
|
16,8,NULL,NULL,NULL
|
|
16,8,NULL,NULL,NULL
|
|
---- TYPES
|
|
BIGINT, BIGINT, DOUBLE, INT, INT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1519: Check that the first analytic sort of a select block
|
|
# materializes TupleIsNullPredicates to be substituted in ancestor nodes.
|
|
# Same test as the one above, but with an aggregation on top.
|
|
select avg(af1), sum(af3), count(a)
|
|
from
|
|
(select
|
|
sum(t1.id) over (partition by t1.bool_col) af1,
|
|
count(1) over (order by t1.int_col) af2,
|
|
avg(g) over (order by f) af3,
|
|
t2.a,
|
|
t2.d
|
|
from functional.alltypestiny t1
|
|
left outer join
|
|
(select
|
|
id as a,
|
|
coalesce(id, 10) as b,
|
|
int_col as c,
|
|
coalesce(int_col, 20) as d,
|
|
bigint_col e,
|
|
coalesce(bigint_col, 30) as f,
|
|
coalesce(id + bigint_col, 40) as g
|
|
from functional.alltypestiny) t2
|
|
on (t1.id = t2.a + 100)) t3
|
|
group by d
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
14,NULL,0
|
|
---- TYPES
|
|
DOUBLE, DOUBLE, BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1519: Check that expr wrapping with a TupleIsNullPredicate
|
|
# is performed correctly with analytics and multiple nesting levels.
|
|
# The test covers these cases:
|
|
# - analytic output that needs to be wrapped
|
|
# - analytic input that needs to be wrapped
|
|
# - both cases above with and without order by in the over() clause
|
|
# - exprs that need to be wrapped multiple times due to multiple outer joins
|
|
select * from
|
|
(select
|
|
a.id,
|
|
sum(x) over (partition by a.id) as x,
|
|
ifnull(y, 10) as y,
|
|
ifnull(z, "b") as z
|
|
from functional.alltypestiny a
|
|
left outer join
|
|
(select id,
|
|
ifnull(int_col, 1) x,
|
|
count(bigint_col) over(partition by id) y,
|
|
ifnull(string_col, "a") z
|
|
from functional.alltypestiny b) v1
|
|
on (a.id = v1.id)) v2
|
|
full outer join
|
|
(select
|
|
c.id,
|
|
sum(x) over (order by c.id) as x,
|
|
ifnull(y, 10) as y,
|
|
ifnull(z, "b") as z
|
|
from functional.alltypestiny c
|
|
left outer join
|
|
(select id,
|
|
ifnull(int_col, 1) x,
|
|
count(bigint_col) over(order by id) y,
|
|
ifnull(string_col, "a") z
|
|
from functional.alltypestiny d) v3
|
|
on (c.id = v3.id)) v4
|
|
on (v2.id = v4.id)
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,0,1,'0',0,0,1,'0'
|
|
1,1,1,'1',1,1,2,'1'
|
|
2,0,1,'0',2,1,3,'0'
|
|
3,1,1,'1',3,2,4,'1'
|
|
4,0,1,'0',4,2,5,'0'
|
|
5,1,1,'1',5,3,6,'1'
|
|
6,0,1,'0',6,3,7,'0'
|
|
7,1,1,'1',7,4,8,'1'
|
|
---- TYPES
|
|
INT, BIGINT, BIGINT, STRING, INT, BIGINT, BIGINT, STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1519: Check that expr wrapping with a TupleIsNullPredicate
|
|
# is performed correctly with analytics and multiple nesting levels.
|
|
# This is a tricky case from IMPALA-1518 which has a non-materialized
|
|
# analytic in a WITH-clause.
|
|
with w as
|
|
(select coalesce(-668, -335, count(smallint_col)) as int_col,
|
|
lead('670', 87) over
|
|
(order by coalesce(-668, -335, count(smallint_col)) desc) as char_col
|
|
from functional.alltypes)
|
|
select
|
|
lead(-410, 11) over
|
|
(order by (t2.int_col) + (t2.int_col) desc,
|
|
greatest(coalesce(t2.float_col, 16.51),
|
|
coalesce(t2.int_col, 997)) desc) as int_col,
|
|
(coalesce(177, -593, 723)) + (lag(227, 43) over
|
|
(order by (t2.int_col) + (t2.int_col) desc,
|
|
greatest(coalesce(t2.float_col, 16.51),
|
|
coalesce(t2.int_col, 997)) desc)) as int_col_2,
|
|
(t2.int_col) + (t2.int_col) as int_col_3,
|
|
greatest(coalesce(t2.float_col, 16.51), coalesce(t2.int_col, 997)) as float_col,
|
|
coalesce(t1.int_col, t1.int_col, t2.int_col) as int_col_4
|
|
from w t1
|
|
right outer join
|
|
(select (t1.month) + (t1.year) as int_col,
|
|
least(coalesce(t1.float_col, 683.98), -213.01) as float_col
|
|
from functional.alltypestiny t1
|
|
where (t1.int_col) not in (t1.smallint_col, t1.smallint_col)) t2
|
|
on (t2.int_col = t1.int_col and t2.int_col = t1.int_col and t2.int_col = t1.int_col)
|
|
---- RESULTS
|
|
---- TYPES
|
|
SMALLINT, INT, BIGINT, DOUBLE, BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1946: Check that On-clause predicates of an outer join assigned in a scan
|
|
# are not wrapped in TupleIsNullPredicates.
|
|
select /* +straight_join */ a.id, b.id
|
|
from functional.alltypestiny a
|
|
left outer join
|
|
(select t1.id, ifnull(t1.int_col, 10) as int_col
|
|
from functional.alltypestiny t1
|
|
inner join functional.alltypestiny t2
|
|
on (t1.id = t2.id)) b
|
|
on (a.id = b.id and b.int_col < 10)
|
|
---- RESULTS
|
|
0,0
|
|
1,1
|
|
2,2
|
|
3,3
|
|
4,4
|
|
5,5
|
|
6,6
|
|
7,7
|
|
---- TYPES
|
|
INT, INT
|
|
====
|
|
---- QUERY
|
|
# Test percent_rank() with multiple partition exprs and multiple order by exprs and
|
|
# compare with the rewrite target query
|
|
select t.year, t.month, t.id, t.pr, (t.r-1)/(t.c-1)
|
|
from (select year, month, id, rank() over (partition by year, month order by id) as r,
|
|
count() over(partition by year, month) as c,
|
|
percent_rank() over(partition by year, month order by id) as pr
|
|
from functional.alltypes where tinyint_col < 2 and month < 3 and (id % 100 < 10)) as t
|
|
order by t.year, t.month, t.id;
|
|
---- RESULTS
|
|
2009,1,0,0,0
|
|
2009,1,1,0.1428571428571428,0.1428571428571428
|
|
2009,1,100,0.2857142857142857,0.2857142857142857
|
|
2009,1,101,0.4285714285714285,0.4285714285714285
|
|
2009,1,200,0.5714285714285714,0.5714285714285714
|
|
2009,1,201,0.7142857142857143,0.7142857142857143
|
|
2009,1,300,0.8571428571428571,0.8571428571428571
|
|
2009,1,301,1,1
|
|
2009,2,400,0,0
|
|
2009,2,401,0.3333333333333333,0.3333333333333333
|
|
2009,2,500,0.6666666666666666,0.6666666666666666
|
|
2009,2,501,1,1
|
|
2010,1,3700,0,0
|
|
2010,1,3701,0.2,0.2
|
|
2010,1,3800,0.4,0.4
|
|
2010,1,3801,0.6,0.6
|
|
2010,1,3900,0.8,0.8
|
|
2010,1,3901,1,1
|
|
2010,2,4000,0,0
|
|
2010,2,4001,0.2,0.2
|
|
2010,2,4100,0.4,0.4
|
|
2010,2,4101,0.6,0.6
|
|
2010,2,4200,0.8,0.8
|
|
2010,2,4201,1,1
|
|
---- TYPES
|
|
INT, INT, INT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Test cume_dist() with multiple partition exprs and multiple order by exprs
|
|
# and compare with the rewrite query
|
|
select t.year, t.month, t.id, t.cd, ((t.c-t.r)+1)/t.c
|
|
from (select year, month, id,
|
|
rank() over (partition by year, month order by id desc) as r,
|
|
count() over(partition by year, month) as c,
|
|
cume_dist() over(partition by year, month order by id) as cd
|
|
from functional.alltypes where tinyint_col < 2 and month < 3 and (id % 100 < 10)) as t
|
|
order by t.year, t.month, t.id;
|
|
---- RESULTS
|
|
2009,1,0,0.125,0.125
|
|
2009,1,1,0.25,0.25
|
|
2009,1,100,0.375,0.375
|
|
2009,1,101,0.5,0.5
|
|
2009,1,200,0.625,0.625
|
|
2009,1,201,0.75,0.75
|
|
2009,1,300,0.875,0.875
|
|
2009,1,301,1,1
|
|
2009,2,400,0.25,0.25
|
|
2009,2,401,0.5,0.5
|
|
2009,2,500,0.75,0.75
|
|
2009,2,501,1,1
|
|
2010,1,3700,0.1666666666666667,0.1666666666666667
|
|
2010,1,3701,0.3333333333333333,0.3333333333333333
|
|
2010,1,3800,0.5,0.5
|
|
2010,1,3801,0.6666666666666666,0.6666666666666666
|
|
2010,1,3900,0.8333333333333334,0.8333333333333334
|
|
2010,1,3901,1,1
|
|
2010,2,4000,0.1666666666666667,0.1666666666666667
|
|
2010,2,4001,0.3333333333333333,0.3333333333333333
|
|
2010,2,4100,0.5,0.5
|
|
2010,2,4101,0.6666666666666666,0.6666666666666666
|
|
2010,2,4200,0.8333333333333334,0.8333333333333334
|
|
2010,2,4201,1,1
|
|
---- TYPES
|
|
INT, INT, INT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Test ntile() with multiple partition exprs and multiple order by exprs and compare
|
|
# with the rewrite query
|
|
select t.year, t.month, t.id, t.nt, floor((if(t.c>3, 3, t.c)*(t.rownum-1))/t.c) + 1
|
|
from (select year, month, id,
|
|
row_number() over (partition by year, month order by id) as rownum,
|
|
count() over(partition by year, month) as c,
|
|
ntile(3) over(partition by year, month order by id) as nt
|
|
from functional.alltypes where tinyint_col < 2 and month < 3 and (id % 100 < 10)) as t
|
|
order by t.year, t.month, t.id;
|
|
---- RESULTS
|
|
2009,1,0,1,1
|
|
2009,1,1,1,1
|
|
2009,1,100,1,1
|
|
2009,1,101,2,2
|
|
2009,1,200,2,2
|
|
2009,1,201,2,2
|
|
2009,1,300,3,3
|
|
2009,1,301,3,3
|
|
2009,2,400,1,1
|
|
2009,2,401,1,1
|
|
2009,2,500,2,2
|
|
2009,2,501,3,3
|
|
2010,1,3700,1,1
|
|
2010,1,3701,1,1
|
|
2010,1,3800,2,2
|
|
2010,1,3801,2,2
|
|
2010,1,3900,3,3
|
|
2010,1,3901,3,3
|
|
2010,2,4000,1,1
|
|
2010,2,4001,1,1
|
|
2010,2,4100,2,2
|
|
2010,2,4101,2,2
|
|
2010,2,4200,3,3
|
|
2010,2,4201,3,3
|
|
---- TYPES
|
|
INT, INT, INT, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test for ntile when the argument is larger than number of rows
|
|
select year, month, id, ntile(1000) over(order by year, month, id) from alltypes where
|
|
tinyint_col < 2 and month < 3 and (id % 100 < 10) order by year, month, id
|
|
---- RESULTS
|
|
2009,1,0,1
|
|
2009,1,1,2
|
|
2009,1,100,3
|
|
2009,1,101,4
|
|
2009,1,200,5
|
|
2009,1,201,6
|
|
2009,1,300,7
|
|
2009,1,301,8
|
|
2009,2,400,9
|
|
2009,2,401,10
|
|
2009,2,500,11
|
|
2009,2,501,12
|
|
2010,1,3700,13
|
|
2010,1,3701,14
|
|
2010,1,3800,15
|
|
2010,1,3801,16
|
|
2010,1,3900,17
|
|
2010,1,3901,18
|
|
2010,2,4000,19
|
|
2010,2,4001,20
|
|
2010,2,4100,21
|
|
2010,2,4101,22
|
|
2010,2,4200,23
|
|
2010,2,4201,24
|
|
---- TYPES
|
|
INT, INT, INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# percent_rank() and cume_dist() with inline views
|
|
select bool_col, a, b, a + b from
|
|
(select
|
|
bool_col,
|
|
percent_rank() over(order by id) a,
|
|
cume_dist() over(partition by bool_col order by id) b
|
|
from
|
|
(select * from functional.alltypes where id < 20) v1) v2
|
|
order by 1, 2, 3
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
false,0.05263157894736842,0.1,0.1526315789473684
|
|
false,0.1578947368421053,0.2,0.3578947368421053
|
|
false,0.2631578947368421,0.3,0.5631578947368421
|
|
false,0.3684210526315789,0.4,0.7684210526315789
|
|
false,0.4736842105263158,0.5,0.9736842105263157
|
|
false,0.5789473684210527,0.6,1.178947368421053
|
|
false,0.6842105263157895,0.7,1.38421052631579
|
|
false,0.7894736842105263,0.8,1.589473684210526
|
|
false,0.8947368421052632,0.9,1.794736842105263
|
|
false,1,1,2
|
|
true,0,0.1,0.1
|
|
true,0.1052631578947368,0.2,0.3052631578947368
|
|
true,0.2105263157894737,0.3,0.5105263157894737
|
|
true,0.3157894736842105,0.4,0.7157894736842105
|
|
true,0.4210526315789473,0.5,0.9210526315789473
|
|
true,0.5263157894736842,0.6,1.126315789473684
|
|
true,0.631578947368421,0.7,1.331578947368421
|
|
true,0.7368421052631579,0.8,1.536842105263158
|
|
true,0.8421052631578947,0.9,1.742105263157895
|
|
true,0.9473684210526315,1,1.947368421052631
|
|
---- TYPES
|
|
BOOLEAN, DOUBLE, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
select 1 a, cume_dist() over (order by int_col) as cd from alltypes where id < 5
|
|
union
|
|
distinct (select 2, percent_rank() over (partition by bool_col order by int_col)
|
|
from alltypesagg where id >= 5 and id <= 10) order by a, cd;
|
|
---- RESULTS
|
|
1,0.2
|
|
1,0.4
|
|
1,0.6
|
|
1,0.8
|
|
1,1
|
|
2,0
|
|
2,0.333333333333333
|
|
2,0.5
|
|
2,0.666666666666666
|
|
2,1
|
|
---- TYPES
|
|
TINYINT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Test analytic function rewrite when the same expr appears twice
|
|
select id, percent_rank() over(order by id) from functional.alltypestiny
|
|
order by percent_rank() over (order by id)
|
|
---- RESULTS
|
|
0,0
|
|
1,0.1428571428571428
|
|
2,0.2857142857142857
|
|
3,0.4285714285714285
|
|
4,0.5714285714285714
|
|
5,0.7142857142857143
|
|
6,0.8571428571428571
|
|
7,1
|
|
---- TYPES
|
|
INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Tests IMPALA-2158. Check that analytic functions with a limit succeed.
|
|
select id, rank() over(order by id) from functional.alltypesagg limit 10
|
|
---- RESULTS
|
|
0,1
|
|
0,1
|
|
1,3
|
|
2,4
|
|
3,5
|
|
4,6
|
|
5,7
|
|
6,8
|
|
7,9
|
|
8,10
|
|
---- TYPES
|
|
INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-2829: MemPool mem transfer to the output rowbatch
|
|
# may result in a crash when the mem transfer occurs right before eos. This
|
|
# repro was crafted to hit the crash by playing with the window and row sizes
|
|
# so that memory allocated within the analytic node is just large enough (>8mb)
|
|
# to be transfered to the output row batch immediately before eos.
|
|
select max(t3.c1), max(t3.c2)
|
|
from (
|
|
select
|
|
avg( t1.timestamp_col )
|
|
over (order by t1.id, t2.id rows between 5000 following and 50000 following) c1,
|
|
avg( t2.timestamp_col )
|
|
over (order by t1.id, t2.id rows between 5000 following and 50000 following) c2
|
|
from alltypesagg t1 join alltypesagg t2 where t1.int_col = t2.int_col
|
|
) t3
|
|
---- RESULTS
|
|
2010-01-10 18:02:05.234931468,2010-01-10 18:02:05.215156078
|
|
---- TYPES
|
|
TIMESTAMP, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# IMPALA-2457 - PERCENT_RANK() returns NaN for row group with 1 row.
|
|
select id, rank() over (partition by id order by tinyint_col),
|
|
count() over (partition by id), percent_rank() over (partition by id order by tinyint_col)
|
|
from functional.alltypestiny order by id
|
|
---- RESULTS
|
|
0,1,1,0
|
|
1,1,1,0
|
|
2,1,1,0
|
|
3,1,1,0
|
|
4,1,1,0
|
|
5,1,1,0
|
|
6,1,1,0
|
|
7,1,1,0
|
|
---- TYPES
|
|
int, bigint, bigint, double
|
|
====
|
|
---- QUERY
|
|
# Check that the a large analytic query can succeed with the minimum buffers (3 buffers
|
|
# for sort, 2 buffer for analytic).
|
|
SET default_spillable_buffer_size=8m;
|
|
SET buffer_pool_limit=40m;
|
|
SELECT lag(-180, 13) over (ORDER BY t1.int_col ASC, t2.int_col ASC) AS int_col
|
|
FROM functional_parquet.alltypes t1 CROSS JOIN functional_parquet.alltypes t2 LIMIT 10;
|
|
---- TYPES
|
|
SMALLINT
|
|
---- RESULTS
|
|
NULL
|
|
NULL
|
|
NULL
|
|
NULL
|
|
NULL
|
|
NULL
|
|
NULL
|
|
NULL
|
|
NULL
|
|
NULL
|
|
====
|
|
---- QUERY
|
|
# IMPALA-2906: Tests that an analytic sort does not attempt to materialize
|
|
# a constant expr wrapped in a TupleIsNullPredicate that is not executable
|
|
# at the analytic sort. Here 'y' is wrapped in a TupleIsNullPredicate, and
|
|
# the analytic sort of the top-level block should not attempt to materialize
|
|
# that wrapped constant expr.
|
|
SELECT
|
|
LEAD(996, 13) OVER (ORDER BY t1.int_col)
|
|
FROM functional.alltypestiny t1
|
|
LEFT JOIN (
|
|
SELECT
|
|
v1.x,
|
|
(-560) IN (694, 44) AS y
|
|
FROM (
|
|
SELECT
|
|
LAG(bigint_col, 34) OVER (ORDER BY t2.int_col) AS x
|
|
FROM functional.alltypestiny t2
|
|
) v1
|
|
) v2
|
|
ON v2.x = t1.bigint_col AND v2.y = t1.bigint_col
|
|
GROUP BY t1.int_col
|
|
---- TYPES
|
|
SMALLINT
|
|
---- RESULTS
|
|
NULL
|
|
NULL
|
|
====
|
|
---- QUERY
|
|
# Start IGNORE NULLS
|
|
# Test all possible combinations of bound types for last_value that don't get rewritten.
|
|
select id, tinyint_col,
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between unbounded preceding and 1 preceding),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between unbounded preceding and current row),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between unbounded preceding and 1 following),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between unbounded preceding and unbounded following),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 2 preceding and 1 preceding),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 2 preceding and current row),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 1 preceding and 2 following),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between current row and current row),
|
|
last_value(cast(tinyint_col as string) ignore nulls) over
|
|
(order by id rows between current row and 1 following),
|
|
last_value(cast(tinyint_col as string) ignore nulls) over
|
|
(order by id rows between 1 following and 2 following),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id range between unbounded preceding and current row),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id range between unbounded preceding and unbounded following)
|
|
from functional.alltypesagg where id < 21
|
|
---- TYPES
|
|
INT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, STRING, STRING, TINYINT, TINYINT
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,NULL,NULL,NULL,NULL,9,NULL,NULL,1,NULL,'NULL','1',NULL,9
|
|
0,NULL,NULL,NULL,1,9,NULL,NULL,2,NULL,'1','2',NULL,9
|
|
1,1,NULL,1,2,9,NULL,1,3,1,'2','3',1,9
|
|
2,2,1,2,3,9,1,2,4,2,'3','4',2,9
|
|
3,3,2,3,4,9,2,3,5,3,'4','5',3,9
|
|
4,4,3,4,5,9,3,4,6,4,'5','6',4,9
|
|
5,5,4,5,6,9,4,5,7,5,'6','7',5,9
|
|
6,6,5,6,7,9,5,6,8,6,'7','8',6,9
|
|
7,7,6,7,8,9,6,7,9,7,'8','9',7,9
|
|
8,8,7,8,9,9,7,8,9,8,'9','9',8,9
|
|
9,9,8,9,9,9,8,9,9,9,'9','NULL',9,9
|
|
10,NULL,9,9,9,9,9,9,1,NULL,'NULL','1',9,9
|
|
10,NULL,9,9,1,9,9,9,2,NULL,'1','2',9,9
|
|
11,1,9,1,2,9,NULL,1,3,1,'2','3',1,9
|
|
12,2,1,2,3,9,1,2,4,2,'3','4',2,9
|
|
13,3,2,3,4,9,2,3,5,3,'4','5',3,9
|
|
14,4,3,4,5,9,3,4,6,4,'5','6',4,9
|
|
15,5,4,5,6,9,4,5,7,5,'6','7',5,9
|
|
16,6,5,6,7,9,5,6,8,6,'7','8',6,9
|
|
17,7,6,7,8,9,6,7,9,7,'8','9',7,9
|
|
18,8,7,8,9,9,7,8,9,8,'9','9',8,9
|
|
19,9,8,9,9,9,8,9,9,9,'9','NULL',9,9
|
|
20,NULL,9,9,9,9,9,9,9,NULL,'NULL','NULL',9,9
|
|
20,NULL,9,9,9,9,9,9,NULL,NULL,'NULL','NULL',9,9
|
|
====
|
|
---- QUERY
|
|
# Test the remaining combinations of bound types. These are handled separately from above
|
|
# because they are rewritten as first_value() functions, so their results can be returned
|
|
# in a different order, since there isn't a total ordering on id.
|
|
select id, tinyint_col,
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 1 preceding and unbounded following),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between current row and unbounded following),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 2 following and unbounded following),
|
|
last_value(tinyint_col ignore nulls) over
|
|
(order by id range between current row and unbounded following)
|
|
from functional.alltypesagg where id < 21 order by id
|
|
---- TYPES
|
|
INT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,NULL,9,9,9,9
|
|
0,NULL,9,9,9,9
|
|
1,1,9,9,9,9
|
|
2,2,9,9,9,9
|
|
3,3,9,9,9,9
|
|
4,4,9,9,9,9
|
|
5,5,9,9,9,9
|
|
6,6,9,9,9,9
|
|
7,7,9,9,9,9
|
|
8,8,9,9,9,9
|
|
9,9,9,9,9,9
|
|
10,NULL,9,9,9,9
|
|
10,NULL,9,9,9,9
|
|
11,1,9,9,9,9
|
|
12,2,9,9,9,9
|
|
13,3,9,9,9,9
|
|
14,4,9,9,9,9
|
|
15,5,9,9,9,9
|
|
16,6,9,9,9,9
|
|
17,7,9,9,9,9
|
|
18,8,9,9,NULL,9
|
|
19,9,9,9,NULL,9
|
|
20,NULL,9,NULL,NULL,NULL
|
|
20,NULL,NULL,NULL,NULL,NULL
|
|
====
|
|
---- QUERY
|
|
# Test all combinations of bound types for first_value that don't get rewritten.
|
|
select id, tinyint_col,
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between unbounded preceding and 1 preceding),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between unbounded preceding and current row),
|
|
first_value(cast(tinyint_col as string) ignore nulls) over
|
|
(order by id rows between unbounded preceding and 1 following),
|
|
first_value(cast(tinyint_col as string) ignore nulls) over
|
|
(order by id rows between unbounded preceding and unbounded following),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id range between unbounded preceding and current row),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id range between unbounded preceding and unbounded following)
|
|
from functional.alltypesagg where id < 21
|
|
---- TYPES
|
|
INT, TINYINT, TINYINT, TINYINT, STRING, STRING, TINYINT, TINYINT
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,NULL,NULL,NULL,'NULL','1',NULL,1
|
|
0,NULL,NULL,NULL,'1','1',NULL,1
|
|
1,1,NULL,1,'1','1',1,1
|
|
2,2,1,1,'1','1',1,1
|
|
3,3,1,1,'1','1',1,1
|
|
4,4,1,1,'1','1',1,1
|
|
5,5,1,1,'1','1',1,1
|
|
6,6,1,1,'1','1',1,1
|
|
7,7,1,1,'1','1',1,1
|
|
8,8,1,1,'1','1',1,1
|
|
9,9,1,1,'1','1',1,1
|
|
10,NULL,1,1,'1','1',1,1
|
|
10,NULL,1,1,'1','1',1,1
|
|
11,1,1,1,'1','1',1,1
|
|
12,2,1,1,'1','1',1,1
|
|
13,3,1,1,'1','1',1,1
|
|
14,4,1,1,'1','1',1,1
|
|
15,5,1,1,'1','1',1,1
|
|
16,6,1,1,'1','1',1,1
|
|
17,7,1,1,'1','1',1,1
|
|
18,8,1,1,'1','1',1,1
|
|
19,9,1,1,'1','1',1,1
|
|
20,NULL,1,1,'1','1',1,1
|
|
20,NULL,1,1,'1','1',1,1
|
|
====
|
|
---- QUERY
|
|
# Test combinations of bound types for first_value that get rewritten as last_value.
|
|
select id, tinyint_col,
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 2 preceding and 1 preceding),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 1 preceding and current row),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 1 preceding and 1 following),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 1 preceding and unbounded following),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between current row and current row),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between current row and 1 following),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between current row and unbounded following),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 1 following and 2 following),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id rows between 2 following and unbounded following),
|
|
first_value(tinyint_col ignore nulls) over
|
|
(order by id range between current row and unbounded following)
|
|
from functional.alltypesagg where id < 21
|
|
---- TYPES
|
|
INT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,NULL,NULL,NULL,NULL,1,NULL,NULL,1,1,1,1
|
|
0,NULL,NULL,NULL,1,1,NULL,1,1,1,2,1
|
|
1,1,NULL,1,1,1,1,1,1,2,3,1
|
|
2,2,1,1,1,1,2,2,2,3,4,2
|
|
3,3,1,2,2,2,3,3,3,4,5,3
|
|
4,4,2,3,3,3,4,4,4,5,6,4
|
|
5,5,3,4,4,4,5,5,5,6,7,5
|
|
6,6,4,5,5,5,6,6,6,7,8,6
|
|
7,7,5,6,6,6,7,7,7,8,9,7
|
|
8,8,6,7,7,7,8,8,8,9,1,8
|
|
9,9,7,8,8,8,9,9,9,NULL,1,9
|
|
10,NULL,8,9,9,9,NULL,NULL,1,1,1,1
|
|
10,NULL,9,NULL,1,1,NULL,1,1,1,2,1
|
|
11,1,NULL,1,1,1,1,1,1,2,3,1
|
|
12,2,1,1,1,1,2,2,2,3,4,2
|
|
13,3,1,2,2,2,3,3,3,4,5,3
|
|
14,4,2,3,3,3,4,4,4,5,6,4
|
|
15,5,3,4,4,4,5,5,5,6,7,5
|
|
16,6,4,5,5,5,6,6,6,7,8,6
|
|
17,7,5,6,6,6,7,7,7,8,9,7
|
|
18,8,6,7,7,7,8,8,8,9,NULL,8
|
|
19,9,7,8,8,8,9,9,9,NULL,NULL,9
|
|
20,NULL,8,9,9,9,NULL,NULL,NULL,NULL,NULL,NULL
|
|
20,NULL,9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL
|
|
====
|
|
---- QUERY
|
|
# Test all possible types for last_value (except decimal).
|
|
select id,
|
|
last_value(bool_col ignore nulls) over (order by id),
|
|
last_value(tinyint_col ignore nulls) over (order by id),
|
|
last_value(smallint_col ignore nulls) over (order by id),
|
|
last_value(int_col ignore nulls) over (order by id),
|
|
last_value(bigint_col ignore nulls) over (order by id),
|
|
last_value(float_col ignore nulls) over (order by id),
|
|
last_value(double_col ignore nulls) over (order by id),
|
|
last_value(string_col ignore nulls) over (order by id),
|
|
last_value(timestamp_col ignore nulls) over (order by id)
|
|
from functional.alltypesagg where id < 5
|
|
---- TYPES
|
|
INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, TIMESTAMP
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'0',2010-01-01 00:00:00
|
|
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'0',2010-01-01 00:00:00
|
|
1,false,1,1,1,10,1.100000023841858,10.1,'1',2010-01-01 00:01:00
|
|
2,true,2,2,2,20,2.200000047683716,20.2,'2',2010-01-01 00:02:00.100000000
|
|
3,false,3,3,3,30,3.299999952316284,30.3,'3',2010-01-01 00:03:00.300000000
|
|
4,true,4,4,4,40,4.400000095367432,40.4,'4',2010-01-01 00:04:00.600000000
|
|
====
|
|
---- QUERY
|
|
# Test all possible types for first_value (except decimal).
|
|
select id,
|
|
first_value(bool_col ignore nulls) over (order by id),
|
|
first_value(tinyint_col ignore nulls) over (order by id),
|
|
first_value(smallint_col ignore nulls) over (order by id),
|
|
first_value(int_col ignore nulls) over (order by id),
|
|
first_value(bigint_col ignore nulls) over (order by id),
|
|
first_value(float_col ignore nulls) over (order by id),
|
|
first_value(double_col ignore nulls) over (order by id),
|
|
first_value(string_col ignore nulls) over (order by id),
|
|
first_value(timestamp_col ignore nulls) over (order by id)
|
|
from functional.alltypesagg where id < 5
|
|
---- TYPES
|
|
INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, TIMESTAMP
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'0',2010-01-01 00:00:00
|
|
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'0',2010-01-01 00:00:00
|
|
1,true,1,1,1,10,1.100000023841858,10.1,'0',2010-01-01 00:00:00
|
|
2,true,1,1,1,10,1.100000023841858,10.1,'0',2010-01-01 00:00:00
|
|
3,true,1,1,1,10,1.100000023841858,10.1,'0',2010-01-01 00:00:00
|
|
4,true,1,1,1,10,1.100000023841858,10.1,'0',2010-01-01 00:00:00
|
|
====
|
|
---- QUERY
|
|
select
|
|
last_value(d1 ignore nulls) over (order by d1),
|
|
first_value(d1 ignore nulls) over (order by d1)
|
|
from functional.decimal_tbl
|
|
---- TYPES
|
|
DECIMAL, DECIMAL
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
1234,1234
|
|
2345,1234
|
|
12345,1234
|
|
12345,1234
|
|
132842,1234
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-4120: Invoke UDFs which do local allocations and verifies
|
|
# that the results are copied out.
|
|
select count(*) from (
|
|
select
|
|
from_unixtime(lead(bigint_col, 1) over (order by id), 'yyyyMMddHH:mm:ss') as a,
|
|
lead(from_unixtime(bigint_col, 'yyyyMMddHH:mm:ss'), 1) over (order by id) AS b
|
|
from functional.alltypes) x
|
|
where x.a = x.b
|
|
---- TYPES
|
|
BIGINT
|
|
---- ERRORS
|
|
---- RESULTS
|
|
7299
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-5623: lag() should free UDF allocated memory.
|
|
# i.e. the fix for IMPALA-4120 should apply to lag() as well.
|
|
select count(*) from (
|
|
select
|
|
from_unixtime(lag(bigint_col, 1) over (order by id), 'yyyyMMddHH:mm:ss') as a,
|
|
lag(from_unixtime(bigint_col, 'yyyyMMddHH:mm:ss'), 1) over (order by id) AS b
|
|
from functional.alltypes) x
|
|
where x.a = x.b
|
|
---- TYPES
|
|
BIGINT
|
|
---- ERRORS
|
|
---- RESULTS
|
|
7299
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-4518: Distinguishes between empty string and null string.
|
|
select f,lead(b,1,null) over (order by f)
|
|
from (select * from nulltable union all select * from nulltable) x;
|
|
---- TYPES
|
|
STRING, STRING
|
|
---- RESULTS
|
|
'a\x00b',''
|
|
'a\x00b','NULL'
|
|
====
|
|
---- QUERY
|
|
# Test for IMPALA-5657: Ignore nulls should be applied correctly on views.
|
|
# Without the fix for IMPALA-5657, the select * query on the view does not
|
|
# apply ignore nulls and returns a few NULLs in the query result.
|
|
create view if not exists imp5657_view as
|
|
select
|
|
last_value(tinyint_col ignore nulls) over (order by tinyint_col)
|
|
from functional.alltypesagg
|
|
where id < 5;
|
|
====
|
|
---- QUERY
|
|
select * from imp5657_view;
|
|
---- TYPES
|
|
TINYINT
|
|
---- RESULTS
|
|
1
|
|
2
|
|
3
|
|
4
|
|
4
|
|
4
|
|
====
|
|
---- QUERY
|
|
# Test for IMPALA-1882: first_value works without order by clause. Therefore, output of
|
|
# function depends on order of input rows, which in this case is DESC
|
|
select id, bool_col, first_value(id) over (partition by bool_col) from
|
|
(select * from functional.alltypessmall order by id desc limit 5) as t1
|
|
---- TYPES
|
|
INT, BOOLEAN, INT
|
|
---- RESULTS
|
|
99,false,99
|
|
97,false,99
|
|
95,false,99
|
|
98,true,98
|
|
96,true,98
|
|
====
|
|
---- QUERY
|
|
# Test for IMPALA-1882: first_value works without order by clause. Therefore, output of
|
|
# function depends on order of input rows, which in this case is ASC
|
|
select id, bool_col, first_value(id) over (partition by bool_col) from
|
|
(select * from functional.alltypessmall order by id asc limit 5) as t1
|
|
---- TYPES
|
|
INT, BOOLEAN, INT
|
|
---- RESULTS
|
|
1,false,1
|
|
3,false,1
|
|
0,true,0
|
|
2,true,0
|
|
4,true,0
|
|
---- QUERY
|
|
# Test for IMPALA-1882: last_value works without order by clause. Therefore, output of
|
|
# function depends on order of input rows, which in this case is DESC
|
|
select id, bool_col, last_value(id) over (partition by bool_col) from
|
|
(select * from functional.alltypessmall order by id desc limit 5) as t1
|
|
---- TYPES
|
|
INT, BOOLEAN, INT
|
|
---- RESULTS
|
|
99,false,95
|
|
97,false,95
|
|
95,false,95
|
|
98,true,96
|
|
96,true,96
|
|
====
|
|
---- QUERY
|
|
# Test for IMPALA-1882: last_value works without order by clause. Therefore, output of
|
|
# function depends on order of input rows, which in this case is ASC
|
|
select id, bool_col, last_value(id) over (partition by bool_col) from
|
|
(select * from functional.alltypessmall order by id asc limit 5) as t1
|
|
---- TYPES
|
|
INT, BOOLEAN, INT
|
|
---- RESULTS
|
|
1,false,3
|
|
3,false,3
|
|
0,true,4
|
|
2,true,4
|
|
4,true,4
|
|
====
|
|
---- QUERY
|
|
# Test for IMPALA-1882: first_value works with ignore nulls and without order by clause.
|
|
# Therefore, output of function depends on order of input rows, which in this case is ASC
|
|
select bool_col, smallint_col, first_value(smallint_col ignore nulls) over
|
|
(partition by bool_col) from (select * from functional.alltypesagg where
|
|
id > 99 order by id asc limit 10) as t1
|
|
---- TYPES
|
|
BOOLEAN, SMALLINT, SMALLINT
|
|
---- RESULTS
|
|
false,1,1
|
|
false,3,1
|
|
false,5,1
|
|
false,7,1
|
|
true,NULL,2
|
|
true,NULL,2
|
|
true,2,2
|
|
true,4,2
|
|
true,6,2
|
|
true,8,2
|
|
====
|
|
---- QUERY
|
|
# Test for IMPALA-1882: first_value works with ignore nulls and without order by clause.
|
|
# Therefore, output of function depends on order of input rows, which in this case is DESC
|
|
select bool_col, smallint_col, first_value(smallint_col ignore nulls) over
|
|
(partition by bool_col) from (select * from functional.alltypesagg where
|
|
id < 101 order by id desc limit 10) as t1;
|
|
---- TYPES
|
|
BOOLEAN, SMALLINT, SMALLINT
|
|
---- RESULTS
|
|
false,99,99
|
|
false,97,99
|
|
false,95,99
|
|
false,93,99
|
|
true,NULL,98
|
|
true,NULL,98
|
|
true,98,98
|
|
true,96,98
|
|
true,94,98
|
|
true,92,98
|
|
====
|
|
---- QUERY
|
|
# Test for IMPALA-1882: last_value works with ignore nulls and without order by clause.
|
|
# Therefore, output of function depends on order of input rows, which in this case is ASC
|
|
select bool_col, smallint_col, last_value(smallint_col ignore nulls) over
|
|
(partition by bool_col) from (select * from functional.alltypesagg where
|
|
id > 99 order by id asc limit 10) as t1;
|
|
---- TYPES
|
|
BOOLEAN, SMALLINT, SMALLINT
|
|
---- RESULTS
|
|
false,1,7
|
|
false,3,7
|
|
false,5,7
|
|
false,7,7
|
|
true,NULL,8
|
|
true,NULL,8
|
|
true,2,8
|
|
true,4,8
|
|
true,6,8
|
|
true,8,8
|
|
====
|
|
---- QUERY
|
|
# Test for IMPALA-1882: last_value works with ignore nulls and without order by clause.
|
|
# Therefore, output of function depends on order of input rows, which in this case is DESC
|
|
select bool_col, smallint_col, last_value(smallint_col ignore nulls) over
|
|
(partition by bool_col) from (select * from functional.alltypesagg where
|
|
id < 110 order by id desc limit 10) as t1;
|
|
---- TYPES
|
|
BOOLEAN, SMALLINT, SMALLINT
|
|
---- RESULTS
|
|
false,9,1
|
|
false,7,1
|
|
false,5,1
|
|
false,3,1
|
|
false,1,1
|
|
true,8,2
|
|
true,6,2
|
|
true,4,2
|
|
true,2,2
|
|
true,NULL,2
|