Files
impala/testdata/workloads/functional-query/queries/QueryTest/grouping-sets.test
Tim Armstrong 574fef2a76 IMPALA-9917: grouping() and grouping_id() support
Implements grouping() and grouping_id() builtins.
grouping_id() has both a no-arg version, which returns
a bit vector of all grouping exprs and a varargs version,
which returns a bit vector of the provided arguments.

Grouping is a keyword, so needs special handling in the
parser to be accepted as a function name.

These functions are implemented in the transpose agg
with a CASE expression similar to other aggregate functions,
but returning the grouping() or grouping_id() value for that
aggregation class instead of an aggregated value.

Testing:
* Added parser test for grouping keyword.
* Added analysis tests for the functions.
* Added basic planner test to show expressions generated
* Added some TPC-DS queries that use grouping() - queries
  80, 70 and 86 using reference .test files from Fang-Yu
  Rao. 27 and 36 were added with reference results from
  https://github.com/cwida/tpcds-result-reproduction
* Add targeted end-to-end tests.
* Added view compatibility test with Hive.

Change-Id: If0b1640d606256c0fe9204d2a21a8f6d06abcdb6
Reviewed-on: http://gerrit.cloudera.org:8080/16140
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Tim Armstrong <tarmstrong@cloudera.com>
2020-07-14 03:13:18 +00:00

328 lines
6.8 KiB
Plaintext

====
---- QUERY
# Basic ROLLUP.
select int_col, string_col, bool_col, count(*)
from alltypesagg
group by rollup(1, 2, 3)
order by count(*) desc, 1, 2, 3
limit 20
---- RESULTS
NULL,'NULL',NULL,11000
10,'10',true,20
10,'10',NULL,20
10,'NULL',NULL,20
20,'20',true,20
20,'20',NULL,20
20,'NULL',NULL,20
30,'30',true,20
30,'30',NULL,20
30,'NULL',NULL,20
40,'40',true,20
40,'40',NULL,20
40,'NULL',NULL,20
50,'50',true,20
50,'50',NULL,20
50,'NULL',NULL,20
60,'60',true,20
60,'60',NULL,20
60,'NULL',NULL,20
70,'70',true,20
---- TYPES
INT,STRING,BOOLEAN,BIGINT
====
---- QUERY
# Basic CUBE
select int_col, string_col, bool_col, count(*)
from alltypesagg
group by cube(1, 2, 3)
order by count(*) desc, 1, 2, 3
limit 20
---- RESULTS
NULL,'NULL',NULL,11000
NULL,'NULL',true,6000
NULL,'NULL',false,5000
10,'10',true,20
10,'10',NULL,20
10,'NULL',true,20
10,'NULL',NULL,20
20,'20',true,20
20,'20',NULL,20
20,'NULL',true,20
20,'NULL',NULL,20
30,'30',true,20
30,'30',NULL,20
30,'NULL',true,20
30,'NULL',NULL,20
40,'40',true,20
40,'40',NULL,20
40,'NULL',true,20
40,'NULL',NULL,20
50,'50',true,20
---- TYPES
INT,STRING,BOOLEAN,BIGINT
====
---- QUERY
# Basic GROUPING SETS
select int_col, string_col, bool_col, count(*)
from alltypesagg
group by grouping sets((int_col), (string_col), (int_col, bool_col))
order by count(*) desc, 1, 2, 3
limit 20
---- RESULTS
10,'NULL',true,20
10,'NULL',NULL,20
20,'NULL',true,20
20,'NULL',NULL,20
30,'NULL',true,20
30,'NULL',NULL,20
40,'NULL',true,20
40,'NULL',NULL,20
50,'NULL',true,20
50,'NULL',NULL,20
60,'NULL',true,20
60,'NULL',NULL,20
70,'NULL',true,20
70,'NULL',NULL,20
80,'NULL',true,20
80,'NULL',NULL,20
90,'NULL',true,20
90,'NULL',NULL,20
100,'NULL',true,20
100,'NULL',NULL,20
---- TYPES
INT,STRING,BOOLEAN,BIGINT
====
---- QUERY
# Output should contain a row for both the NULL value in
# int_col and the rollup of all values in int_col.
select year, int_col, count(*)
from alltypesagg
group by rollup(year, int_col)
order by 2 desc, 1, 3
limit 10
---- RESULTS
2010,NULL,20
2010,NULL,11000
NULL,NULL,11000
2010,999,10
2010,998,10
2010,997,10
2010,996,10
2010,995,10
2010,994,10
2010,993,10
---- TYPES
INT, INT, BIGINT
====
---- QUERY
# Output should contain a row for both the NULL value in
# int_col and the rollup of all values in int_col.
select year, int_col, count(*)
from alltypesagg
group by rollup(year, int_col)
having int_col is NULL
---- RESULTS
NULL,NULL,11000
2010,NULL,20
2010,NULL,11000
---- TYPES
INT, INT, BIGINT
====
---- QUERY
# Correlated subquery with rollup. Results are the same as
# if rollup was not present.
select id
from functional.alltypesagg a
where exists
(select id
from functional.alltypestiny b
where a.tinyint_col = b.tinyint_col and a.string_col = b.string_col
group by rollup(id, int_col, bool_col))
and tinyint_col < 10
---- RESULTS
1
1001
2001
3001
4001
5001
6001
7001
8001
9001
---- TYPES
INT
====
---- QUERY
# Test grouping() and grouping_id() on a table with null values.
select
grouping_id(),
grouping(int_col % 2),
grouping(substring(string_col, 1, 1)),
grouping(bool_col),
int_col % 2 i,
substring(string_col, 1, 1) str,
bool_col,
count(*)
from alltypesagg
group by cube(i, str, bool_col)
order by grouping_id() desc, i, str, bool_col
---- RESULTS
7,1,1,1,NULL,'NULL',NULL,11000
6,1,1,0,NULL,'NULL',false,5000
6,1,1,0,NULL,'NULL',true,6000
5,1,0,1,NULL,'0',NULL,20
5,1,0,1,NULL,'1',NULL,1220
5,1,0,1,NULL,'2',NULL,1220
5,1,0,1,NULL,'3',NULL,1220
5,1,0,1,NULL,'4',NULL,1220
5,1,0,1,NULL,'5',NULL,1220
5,1,0,1,NULL,'6',NULL,1220
5,1,0,1,NULL,'7',NULL,1220
5,1,0,1,NULL,'8',NULL,1220
5,1,0,1,NULL,'9',NULL,1220
4,1,0,0,NULL,'0',true,20
4,1,0,0,NULL,'1',false,560
4,1,0,0,NULL,'1',true,660
4,1,0,0,NULL,'2',false,550
4,1,0,0,NULL,'2',true,670
4,1,0,0,NULL,'3',false,560
4,1,0,0,NULL,'3',true,660
4,1,0,0,NULL,'4',false,550
4,1,0,0,NULL,'4',true,670
4,1,0,0,NULL,'5',false,560
4,1,0,0,NULL,'5',true,660
4,1,0,0,NULL,'6',false,550
4,1,0,0,NULL,'6',true,670
4,1,0,0,NULL,'7',false,560
4,1,0,0,NULL,'7',true,660
4,1,0,0,NULL,'8',false,550
4,1,0,0,NULL,'8',true,670
4,1,0,0,NULL,'9',false,560
4,1,0,0,NULL,'9',true,660
3,0,1,1,0,'NULL',NULL,5980
3,0,1,1,1,'NULL',NULL,5000
3,0,1,1,NULL,'NULL',NULL,20
2,0,1,0,0,'NULL',true,5980
2,0,1,0,1,'NULL',false,5000
2,0,1,0,NULL,'NULL',true,20
1,0,0,1,0,'1',NULL,660
1,0,0,1,0,'2',NULL,670
1,0,0,1,0,'3',NULL,660
1,0,0,1,0,'4',NULL,670
1,0,0,1,0,'5',NULL,660
1,0,0,1,0,'6',NULL,670
1,0,0,1,0,'7',NULL,660
1,0,0,1,0,'8',NULL,670
1,0,0,1,0,'9',NULL,660
1,0,0,1,1,'1',NULL,560
1,0,0,1,1,'2',NULL,550
1,0,0,1,1,'3',NULL,560
1,0,0,1,1,'4',NULL,550
1,0,0,1,1,'5',NULL,560
1,0,0,1,1,'6',NULL,550
1,0,0,1,1,'7',NULL,560
1,0,0,1,1,'8',NULL,550
1,0,0,1,1,'9',NULL,560
1,0,0,1,NULL,'0',NULL,20
0,0,0,0,0,'1',true,660
0,0,0,0,0,'2',true,670
0,0,0,0,0,'3',true,660
0,0,0,0,0,'4',true,670
0,0,0,0,0,'5',true,660
0,0,0,0,0,'6',true,670
0,0,0,0,0,'7',true,660
0,0,0,0,0,'8',true,670
0,0,0,0,0,'9',true,660
0,0,0,0,1,'1',false,560
0,0,0,0,1,'2',false,550
0,0,0,0,1,'3',false,560
0,0,0,0,1,'4',false,550
0,0,0,0,1,'5',false,560
0,0,0,0,1,'6',false,550
0,0,0,0,1,'7',false,560
0,0,0,0,1,'8',false,550
0,0,0,0,1,'9',false,560
0,0,0,0,NULL,'0',true,20
---- TYPES
BIGINT, TINYINT, TINYINT, TINYINT, INT, STRING, BOOLEAN, BIGINT
====
---- QUERY
# Test grouping_id() with different argument list order and length
select
grouping_id(bool_col, string_col, int_col),
grouping_id(bool_col),
int_col,
string_col,
bool_col,
count(*)
from alltypestiny
group by rollup(int_col, string_col, bool_col)
---- RESULTS
6,1,0,'NULL',NULL,4
0,0,1,'1',false,4
7,1,NULL,'NULL',NULL,8
4,1,0,'0',NULL,4
4,1,1,'1',NULL,4
0,0,0,'0',true,4
6,1,1,'NULL',NULL,4
---- TYPES
BIGINT, BIGINT, INT, STRING, BOOLEAN, BIGINT
====
---- QUERY
# Test grouping() and grouping_id() with a single grouping set.
select
grouping_id(int_col, string_col),
grouping(int_col),
grouping(string_col),
int_col,
string_col,
count(*)
from alltypestiny
group by grouping sets((int_col, string_col))
---- RESULTS
0,0,0,1,'1',4
0,0,0,0,'0',4
---- TYPES
BIGINT, TINYINT, TINYINT, INT, STRING, BIGINT
====
---- QUERY
# Test grouping() and grouping_id() with plain GROUP BY, where they always
# return 0.
select
grouping_id(int_col, string_col),
grouping_id(),
grouping(int_col),
grouping(string_col),
int_col,
string_col,
count(*)
from alltypestiny
group by int_col, string_col
---- RESULTS
0,0,0,0,1,'1',4
0,0,0,0,0,'0',4
---- TYPES
BIGINT, BIGINT, TINYINT, TINYINT, INT, STRING, BIGINT
====
---- QUERY
# Test grouping() and grouping_id() with plain GROUP BY and multiple aggregation
# classes from the distinct aggregate functions. They always return 0 in this
# case.
select
grouping_id(int_col, string_col),
grouping(int_col),
grouping(string_col),
int_col,
string_col,
count(distinct tinyint_col),
count(distinct timestamp_col)
from alltypestiny
group by int_col, string_col
---- RESULTS
0,0,0,1,'1',1,4
0,0,0,0,'0',1,4
---- TYPES
BIGINT, TINYINT, TINYINT, INT, STRING, BIGINT, BIGINT
====