mirror of
https://github.com/apache/impala.git
synced 2026-01-09 15:00:11 -05:00
Implements grouping() and grouping_id() builtins. grouping_id() has both a no-arg version, which returns a bit vector of all grouping exprs and a varargs version, which returns a bit vector of the provided arguments. Grouping is a keyword, so needs special handling in the parser to be accepted as a function name. These functions are implemented in the transpose agg with a CASE expression similar to other aggregate functions, but returning the grouping() or grouping_id() value for that aggregation class instead of an aggregated value. Testing: * Added parser test for grouping keyword. * Added analysis tests for the functions. * Added basic planner test to show expressions generated * Added some TPC-DS queries that use grouping() - queries 80, 70 and 86 using reference .test files from Fang-Yu Rao. 27 and 36 were added with reference results from https://github.com/cwida/tpcds-result-reproduction * Add targeted end-to-end tests. * Added view compatibility test with Hive. Change-Id: If0b1640d606256c0fe9204d2a21a8f6d06abcdb6 Reviewed-on: http://gerrit.cloudera.org:8080/16140 Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com> Tested-by: Tim Armstrong <tarmstrong@cloudera.com>
328 lines
6.8 KiB
Plaintext
328 lines
6.8 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Basic ROLLUP.
|
|
select int_col, string_col, bool_col, count(*)
|
|
from alltypesagg
|
|
group by rollup(1, 2, 3)
|
|
order by count(*) desc, 1, 2, 3
|
|
limit 20
|
|
---- RESULTS
|
|
NULL,'NULL',NULL,11000
|
|
10,'10',true,20
|
|
10,'10',NULL,20
|
|
10,'NULL',NULL,20
|
|
20,'20',true,20
|
|
20,'20',NULL,20
|
|
20,'NULL',NULL,20
|
|
30,'30',true,20
|
|
30,'30',NULL,20
|
|
30,'NULL',NULL,20
|
|
40,'40',true,20
|
|
40,'40',NULL,20
|
|
40,'NULL',NULL,20
|
|
50,'50',true,20
|
|
50,'50',NULL,20
|
|
50,'NULL',NULL,20
|
|
60,'60',true,20
|
|
60,'60',NULL,20
|
|
60,'NULL',NULL,20
|
|
70,'70',true,20
|
|
---- TYPES
|
|
INT,STRING,BOOLEAN,BIGINT
|
|
====
|
|
---- QUERY
|
|
# Basic CUBE
|
|
select int_col, string_col, bool_col, count(*)
|
|
from alltypesagg
|
|
group by cube(1, 2, 3)
|
|
order by count(*) desc, 1, 2, 3
|
|
limit 20
|
|
---- RESULTS
|
|
NULL,'NULL',NULL,11000
|
|
NULL,'NULL',true,6000
|
|
NULL,'NULL',false,5000
|
|
10,'10',true,20
|
|
10,'10',NULL,20
|
|
10,'NULL',true,20
|
|
10,'NULL',NULL,20
|
|
20,'20',true,20
|
|
20,'20',NULL,20
|
|
20,'NULL',true,20
|
|
20,'NULL',NULL,20
|
|
30,'30',true,20
|
|
30,'30',NULL,20
|
|
30,'NULL',true,20
|
|
30,'NULL',NULL,20
|
|
40,'40',true,20
|
|
40,'40',NULL,20
|
|
40,'NULL',true,20
|
|
40,'NULL',NULL,20
|
|
50,'50',true,20
|
|
---- TYPES
|
|
INT,STRING,BOOLEAN,BIGINT
|
|
====
|
|
---- QUERY
|
|
# Basic GROUPING SETS
|
|
select int_col, string_col, bool_col, count(*)
|
|
from alltypesagg
|
|
group by grouping sets((int_col), (string_col), (int_col, bool_col))
|
|
order by count(*) desc, 1, 2, 3
|
|
limit 20
|
|
---- RESULTS
|
|
10,'NULL',true,20
|
|
10,'NULL',NULL,20
|
|
20,'NULL',true,20
|
|
20,'NULL',NULL,20
|
|
30,'NULL',true,20
|
|
30,'NULL',NULL,20
|
|
40,'NULL',true,20
|
|
40,'NULL',NULL,20
|
|
50,'NULL',true,20
|
|
50,'NULL',NULL,20
|
|
60,'NULL',true,20
|
|
60,'NULL',NULL,20
|
|
70,'NULL',true,20
|
|
70,'NULL',NULL,20
|
|
80,'NULL',true,20
|
|
80,'NULL',NULL,20
|
|
90,'NULL',true,20
|
|
90,'NULL',NULL,20
|
|
100,'NULL',true,20
|
|
100,'NULL',NULL,20
|
|
---- TYPES
|
|
INT,STRING,BOOLEAN,BIGINT
|
|
====
|
|
---- QUERY
|
|
# Output should contain a row for both the NULL value in
|
|
# int_col and the rollup of all values in int_col.
|
|
select year, int_col, count(*)
|
|
from alltypesagg
|
|
group by rollup(year, int_col)
|
|
order by 2 desc, 1, 3
|
|
limit 10
|
|
---- RESULTS
|
|
2010,NULL,20
|
|
2010,NULL,11000
|
|
NULL,NULL,11000
|
|
2010,999,10
|
|
2010,998,10
|
|
2010,997,10
|
|
2010,996,10
|
|
2010,995,10
|
|
2010,994,10
|
|
2010,993,10
|
|
---- TYPES
|
|
INT, INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Output should contain a row for both the NULL value in
|
|
# int_col and the rollup of all values in int_col.
|
|
select year, int_col, count(*)
|
|
from alltypesagg
|
|
group by rollup(year, int_col)
|
|
having int_col is NULL
|
|
---- RESULTS
|
|
NULL,NULL,11000
|
|
2010,NULL,20
|
|
2010,NULL,11000
|
|
---- TYPES
|
|
INT, INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Correlated subquery with rollup. Results are the same as
|
|
# if rollup was not present.
|
|
select id
|
|
from functional.alltypesagg a
|
|
where exists
|
|
(select id
|
|
from functional.alltypestiny b
|
|
where a.tinyint_col = b.tinyint_col and a.string_col = b.string_col
|
|
group by rollup(id, int_col, bool_col))
|
|
and tinyint_col < 10
|
|
---- RESULTS
|
|
1
|
|
1001
|
|
2001
|
|
3001
|
|
4001
|
|
5001
|
|
6001
|
|
7001
|
|
8001
|
|
9001
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
# Test grouping() and grouping_id() on a table with null values.
|
|
select
|
|
grouping_id(),
|
|
grouping(int_col % 2),
|
|
grouping(substring(string_col, 1, 1)),
|
|
grouping(bool_col),
|
|
int_col % 2 i,
|
|
substring(string_col, 1, 1) str,
|
|
bool_col,
|
|
count(*)
|
|
from alltypesagg
|
|
group by cube(i, str, bool_col)
|
|
order by grouping_id() desc, i, str, bool_col
|
|
---- RESULTS
|
|
7,1,1,1,NULL,'NULL',NULL,11000
|
|
6,1,1,0,NULL,'NULL',false,5000
|
|
6,1,1,0,NULL,'NULL',true,6000
|
|
5,1,0,1,NULL,'0',NULL,20
|
|
5,1,0,1,NULL,'1',NULL,1220
|
|
5,1,0,1,NULL,'2',NULL,1220
|
|
5,1,0,1,NULL,'3',NULL,1220
|
|
5,1,0,1,NULL,'4',NULL,1220
|
|
5,1,0,1,NULL,'5',NULL,1220
|
|
5,1,0,1,NULL,'6',NULL,1220
|
|
5,1,0,1,NULL,'7',NULL,1220
|
|
5,1,0,1,NULL,'8',NULL,1220
|
|
5,1,0,1,NULL,'9',NULL,1220
|
|
4,1,0,0,NULL,'0',true,20
|
|
4,1,0,0,NULL,'1',false,560
|
|
4,1,0,0,NULL,'1',true,660
|
|
4,1,0,0,NULL,'2',false,550
|
|
4,1,0,0,NULL,'2',true,670
|
|
4,1,0,0,NULL,'3',false,560
|
|
4,1,0,0,NULL,'3',true,660
|
|
4,1,0,0,NULL,'4',false,550
|
|
4,1,0,0,NULL,'4',true,670
|
|
4,1,0,0,NULL,'5',false,560
|
|
4,1,0,0,NULL,'5',true,660
|
|
4,1,0,0,NULL,'6',false,550
|
|
4,1,0,0,NULL,'6',true,670
|
|
4,1,0,0,NULL,'7',false,560
|
|
4,1,0,0,NULL,'7',true,660
|
|
4,1,0,0,NULL,'8',false,550
|
|
4,1,0,0,NULL,'8',true,670
|
|
4,1,0,0,NULL,'9',false,560
|
|
4,1,0,0,NULL,'9',true,660
|
|
3,0,1,1,0,'NULL',NULL,5980
|
|
3,0,1,1,1,'NULL',NULL,5000
|
|
3,0,1,1,NULL,'NULL',NULL,20
|
|
2,0,1,0,0,'NULL',true,5980
|
|
2,0,1,0,1,'NULL',false,5000
|
|
2,0,1,0,NULL,'NULL',true,20
|
|
1,0,0,1,0,'1',NULL,660
|
|
1,0,0,1,0,'2',NULL,670
|
|
1,0,0,1,0,'3',NULL,660
|
|
1,0,0,1,0,'4',NULL,670
|
|
1,0,0,1,0,'5',NULL,660
|
|
1,0,0,1,0,'6',NULL,670
|
|
1,0,0,1,0,'7',NULL,660
|
|
1,0,0,1,0,'8',NULL,670
|
|
1,0,0,1,0,'9',NULL,660
|
|
1,0,0,1,1,'1',NULL,560
|
|
1,0,0,1,1,'2',NULL,550
|
|
1,0,0,1,1,'3',NULL,560
|
|
1,0,0,1,1,'4',NULL,550
|
|
1,0,0,1,1,'5',NULL,560
|
|
1,0,0,1,1,'6',NULL,550
|
|
1,0,0,1,1,'7',NULL,560
|
|
1,0,0,1,1,'8',NULL,550
|
|
1,0,0,1,1,'9',NULL,560
|
|
1,0,0,1,NULL,'0',NULL,20
|
|
0,0,0,0,0,'1',true,660
|
|
0,0,0,0,0,'2',true,670
|
|
0,0,0,0,0,'3',true,660
|
|
0,0,0,0,0,'4',true,670
|
|
0,0,0,0,0,'5',true,660
|
|
0,0,0,0,0,'6',true,670
|
|
0,0,0,0,0,'7',true,660
|
|
0,0,0,0,0,'8',true,670
|
|
0,0,0,0,0,'9',true,660
|
|
0,0,0,0,1,'1',false,560
|
|
0,0,0,0,1,'2',false,550
|
|
0,0,0,0,1,'3',false,560
|
|
0,0,0,0,1,'4',false,550
|
|
0,0,0,0,1,'5',false,560
|
|
0,0,0,0,1,'6',false,550
|
|
0,0,0,0,1,'7',false,560
|
|
0,0,0,0,1,'8',false,550
|
|
0,0,0,0,1,'9',false,560
|
|
0,0,0,0,NULL,'0',true,20
|
|
---- TYPES
|
|
BIGINT, TINYINT, TINYINT, TINYINT, INT, STRING, BOOLEAN, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test grouping_id() with different argument list order and length
|
|
select
|
|
grouping_id(bool_col, string_col, int_col),
|
|
grouping_id(bool_col),
|
|
int_col,
|
|
string_col,
|
|
bool_col,
|
|
count(*)
|
|
from alltypestiny
|
|
group by rollup(int_col, string_col, bool_col)
|
|
---- RESULTS
|
|
6,1,0,'NULL',NULL,4
|
|
0,0,1,'1',false,4
|
|
7,1,NULL,'NULL',NULL,8
|
|
4,1,0,'0',NULL,4
|
|
4,1,1,'1',NULL,4
|
|
0,0,0,'0',true,4
|
|
6,1,1,'NULL',NULL,4
|
|
---- TYPES
|
|
BIGINT, BIGINT, INT, STRING, BOOLEAN, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test grouping() and grouping_id() with a single grouping set.
|
|
select
|
|
grouping_id(int_col, string_col),
|
|
grouping(int_col),
|
|
grouping(string_col),
|
|
int_col,
|
|
string_col,
|
|
count(*)
|
|
from alltypestiny
|
|
group by grouping sets((int_col, string_col))
|
|
---- RESULTS
|
|
0,0,0,1,'1',4
|
|
0,0,0,0,'0',4
|
|
---- TYPES
|
|
BIGINT, TINYINT, TINYINT, INT, STRING, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test grouping() and grouping_id() with plain GROUP BY, where they always
|
|
# return 0.
|
|
select
|
|
grouping_id(int_col, string_col),
|
|
grouping_id(),
|
|
grouping(int_col),
|
|
grouping(string_col),
|
|
int_col,
|
|
string_col,
|
|
count(*)
|
|
from alltypestiny
|
|
group by int_col, string_col
|
|
---- RESULTS
|
|
0,0,0,0,1,'1',4
|
|
0,0,0,0,0,'0',4
|
|
---- TYPES
|
|
BIGINT, BIGINT, TINYINT, TINYINT, INT, STRING, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Test grouping() and grouping_id() with plain GROUP BY and multiple aggregation
|
|
# classes from the distinct aggregate functions. They always return 0 in this
|
|
# case.
|
|
select
|
|
grouping_id(int_col, string_col),
|
|
grouping(int_col),
|
|
grouping(string_col),
|
|
int_col,
|
|
string_col,
|
|
count(distinct tinyint_col),
|
|
count(distinct timestamp_col)
|
|
from alltypestiny
|
|
group by int_col, string_col
|
|
---- RESULTS
|
|
0,0,0,1,'1',1,4
|
|
0,0,0,0,'0',1,4
|
|
---- TYPES
|
|
BIGINT, TINYINT, TINYINT, INT, STRING, BIGINT, BIGINT
|
|
====
|