mirror of
https://github.com/apache/impala.git
synced 2025-12-30 12:02:10 -05:00
Test files in testdata/workloads/functional-query/queries/QueryTest
are parsed by test_file_parser.py, which used to ignore everything
before the first ==== line as a file header. This change fixes all
affected files.
This change also modifies the test file parser to forbid headers
starting with what looks like a subsection title ('----'), which
should prevent the reintroduction of similar errors in the future.
Change-Id: Iaa1bc5ffd02782e24289c7843dcb35401c334519
Reviewed-on: http://gerrit.cloudera.org:8080/2220
Reviewed-by: Lars Volker <lv@cloudera.com>
Tested-by: Internal Jenkins
373 lines
9.1 KiB
Plaintext
373 lines
9.1 KiB
Plaintext
====
|
|
---- QUERY
|
|
# count(distinct)
|
|
select count(distinct smallint_col, tinyint_col)
|
|
from alltypesagg
|
|
---- RESULTS
|
|
90
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select count(distinct bool_col)
|
|
from alltypesagg
|
|
---- RESULTS
|
|
2
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select count(distinct int_col)
|
|
from alltypesagg
|
|
---- RESULTS
|
|
999
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select count(distinct float_col)
|
|
from alltypesagg
|
|
---- RESULTS
|
|
999
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select count(distinct double_col)
|
|
from alltypesagg
|
|
---- RESULTS
|
|
999
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select count(distinct string_col)
|
|
from alltypesagg
|
|
---- RESULTS
|
|
1000
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select count(distinct date_string_col)
|
|
from alltypesagg
|
|
---- RESULTS
|
|
10
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select count(distinct timestamp_col)
|
|
from alltypesagg
|
|
---- RESULTS
|
|
10000
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# count(distinct) w/ grouping and non-distinct count()
|
|
select tinyint_col, count(distinct smallint_col), count(smallint_col)
|
|
from alltypesagg group by 1
|
|
---- RESULTS
|
|
2,10,1000
|
|
3,10,1000
|
|
NULL,9,1800
|
|
4,10,1000
|
|
8,10,1000
|
|
5,10,1000
|
|
9,10,1000
|
|
7,10,1000
|
|
1,10,1000
|
|
6,10,1000
|
|
---- TYPES
|
|
tinyint, bigint, bigint
|
|
====
|
|
---- QUERY
|
|
# count(distinct) w/ grouping and non-distinct count()
|
|
select tinyint_col, count(distinct int_col, smallint_col), count(smallint_col)
|
|
from alltypesagg group by 1
|
|
---- RESULTS
|
|
2,100,1000
|
|
3,100,1000
|
|
NULL,90,1800
|
|
4,100,1000
|
|
8,100,1000
|
|
5,100,1000
|
|
9,100,1000
|
|
7,100,1000
|
|
1,100,1000
|
|
6,100,1000
|
|
---- TYPES
|
|
tinyint, bigint, bigint
|
|
====
|
|
---- QUERY
|
|
# count(distinct) and sum(distinct) w/ grouping and non-distinct count()
|
|
select tinyint_col, count(distinct smallint_col), sum(distinct smallint_col),
|
|
count(smallint_col)
|
|
from alltypesagg group by 1
|
|
---- RESULTS
|
|
2,10,470,1000
|
|
3,10,480,1000
|
|
NULL,9,450,1800
|
|
4,10,490,1000
|
|
8,10,530,1000
|
|
5,10,500,1000
|
|
9,10,540,1000
|
|
7,10,520,1000
|
|
1,10,460,1000
|
|
6,10,510,1000
|
|
---- TYPES
|
|
tinyint, bigint, bigint, bigint
|
|
====
|
|
---- QUERY
|
|
# count(distinct) and sum(distinct) w/ grouping; distinct in min() and max()
|
|
# ignored
|
|
select tinyint_col, count(distinct smallint_col), sum(distinct smallint_col),
|
|
count(smallint_col), min(distinct int_col), max(distinct float_col)
|
|
from alltypesagg group by 1
|
|
---- RESULTS
|
|
2,10,470,1000,2,1091.199951171875
|
|
3,10,480,1000,3,1092.300048828125
|
|
NULL,9,450,1800,10,1089
|
|
4,10,490,1000,4,1093.400024414062
|
|
8,10,530,1000,8,1097.800048828125
|
|
5,10,500,1000,5,1094.5
|
|
9,10,540,1000,9,1098.900024414062
|
|
7,10,520,1000,7,1096.699951171875
|
|
1,10,460,1000,1,1090.099975585938
|
|
6,10,510,1000,6,1095.599975585938
|
|
---- TYPES
|
|
tinyint, bigint, bigint, bigint, int, float
|
|
====
|
|
---- QUERY
|
|
# count distinct order by the same agg expr
|
|
select count(distinct id) as count_id from alltypessmall order by count_id limit 100
|
|
---- RESULTS
|
|
100
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# count distinct order by a diff agg expr
|
|
select count(distinct id) as sum_id from alltypessmall order by max(distinct id)
|
|
limit 100
|
|
---- RESULTS
|
|
100
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# distinct *
|
|
select distinct *
|
|
from alltypesagg
|
|
where id < 20
|
|
---- RESULTS
|
|
6,true,6,6,6,60,6.599999904632568,60.59999999999999,'01/01/10','6',2010-01-01 00:06:00.150000000,2010,1,1
|
|
14,true,4,14,14,140,15.39999961853027,141.4,'01/01/10','14',2010-01-01 00:14:00.910000000,2010,1,1
|
|
18,true,8,18,18,180,19.79999923706055,181.8,'01/01/10','18',2010-01-01 00:18:01.530000000,2010,1,1
|
|
19,false,9,19,19,190,20.89999961853027,191.9,'01/01/10','19',2010-01-01 00:19:01.710000000,2010,1,1
|
|
1,false,1,1,1,10,1.100000023841858,10.1,'01/01/10','1',2010-01-01 00:01:00,2010,1,1
|
|
2,true,2,2,2,20,2.200000047683716,20.2,'01/01/10','2',2010-01-01 00:02:00.100000000,2010,1,1
|
|
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/10','0',2010-01-01 00:00:00,2010,1,1
|
|
7,false,7,7,7,70,7.699999809265137,70.7,'01/01/10','7',2010-01-01 00:07:00.210000000,2010,1,1
|
|
9,false,9,9,9,90,9.899999618530273,90.89999999999999,'01/01/10','9',2010-01-01 00:09:00.360000000,2010,1,1
|
|
11,false,1,11,11,110,12.10000038146973,111.1,'01/01/10','11',2010-01-01 00:11:00.550000000,2010,1,1
|
|
3,false,3,3,3,30,3.299999952316284,30.3,'01/01/10','3',2010-01-01 00:03:00.300000000,2010,1,1
|
|
15,false,5,15,15,150,16.5,151.5,'01/01/10','15',2010-01-01 00:15:01.500000000,2010,1,1
|
|
10,true,NULL,10,10,100,11,101,'01/01/10','10',2010-01-01 00:10:00.450000000,2010,1,1
|
|
17,false,7,17,17,170,18.70000076293945,171.7,'01/01/10','17',2010-01-01 00:17:01.360000000,2010,1,1
|
|
5,false,5,5,5,50,5.5,50.5,'01/01/10','5',2010-01-01 00:05:00.100000000,2010,1,1
|
|
10,true,NULL,10,10,100,11,101,'01/01/10','10',2010-01-01 00:10:00.450000000,2010,1,NULL
|
|
8,true,8,8,8,80,8.800000190734863,80.8,'01/01/10','8',2010-01-01 00:08:00.280000000,2010,1,1
|
|
13,false,3,13,13,130,14.30000019073486,131.3,'01/01/10','13',2010-01-01 00:13:00.780000000,2010,1,1
|
|
12,true,2,12,12,120,13.19999980926514,121.2,'01/01/10','12',2010-01-01 00:12:00.660000000,2010,1,1
|
|
16,true,6,16,16,160,17.60000038146973,161.6,'01/01/10','16',2010-01-01 00:16:01.200000000,2010,1,1
|
|
4,true,4,4,4,40,4.400000095367432,40.4,'01/01/10','4',2010-01-01 00:04:00.600000000,2010,1,1
|
|
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/10','0',2010-01-01 00:00:00,2010,1,NULL
|
|
---- TYPES
|
|
int, boolean, tinyint, smallint, int, bigint, float, double, string, string, timestamp, int, int, int
|
|
====
|
|
---- QUERY
|
|
# distinct w/ explicit select list
|
|
select distinct bool_col, tinyint_col
|
|
from alltypesagg
|
|
---- RESULTS
|
|
false,1
|
|
false,3
|
|
false,5
|
|
false,7
|
|
false,9
|
|
true,2
|
|
true,4
|
|
true,6
|
|
true,8
|
|
true,NULL
|
|
---- TYPES
|
|
boolean, tinyint
|
|
====
|
|
---- QUERY
|
|
# test count distinct with nulls w/o groupby
|
|
select count(distinct NULL) from alltypesagg
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# test count distinct with nulls w/o groupby
|
|
select count(distinct int_col, NULL) from alltypesagg
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# test count distinct with nulls and groupby
|
|
select tinyint_col, count(distinct NULL) from alltypesagg group by tinyint_col
|
|
order by tinyint_col limit 10
|
|
---- RESULTS
|
|
1,0
|
|
2,0
|
|
3,0
|
|
4,0
|
|
5,0
|
|
6,0
|
|
7,0
|
|
8,0
|
|
9,0
|
|
NULL,0
|
|
---- TYPES
|
|
tinyint, bigint
|
|
====
|
|
---- QUERY
|
|
select tinyint_col, count(distinct int_col, NULL) from alltypesagg group by tinyint_col
|
|
order by tinyint_col limit 10
|
|
---- RESULTS
|
|
1,0
|
|
2,0
|
|
3,0
|
|
4,0
|
|
5,0
|
|
6,0
|
|
7,0
|
|
8,0
|
|
9,0
|
|
NULL,0
|
|
---- TYPES
|
|
tinyint, bigint
|
|
====
|
|
---- QUERY
|
|
# make sure we still return the NULL group even though NULLs are not added to COUNT
|
|
select tinyint_col, count(distinct tinyint_col, NULL) from alltypesagg group by tinyint_col
|
|
order by tinyint_col limit 10
|
|
---- RESULTS
|
|
1,0
|
|
2,0
|
|
3,0
|
|
4,0
|
|
5,0
|
|
6,0
|
|
7,0
|
|
8,0
|
|
9,0
|
|
NULL,0
|
|
---- TYPES
|
|
tinyint, bigint
|
|
====
|
|
---- QUERY
|
|
# test join on inline views containing distinct aggregates to make sure
|
|
# the aggregation info reports the correct tuple ids (from the 2nd phase
|
|
# distinct aggregation) for the inline-view expression substitution
|
|
select t1.c, t2.c from
|
|
(select count(distinct int_col) as c from functional.alltypestiny) t1 inner join
|
|
(select count(distinct bigint_col) as c from functional.alltypestiny) t2 on (t1.c = t2.c)
|
|
---- RESULTS
|
|
2,2
|
|
---- TYPES
|
|
bigint, bigint
|
|
====
|
|
---- QUERY
|
|
# Test distinct + non-distinct aggregates without group by and having clause (IMPALA-845)
|
|
# alltypessmall has 100 non-null values for bigint_col, expect a single result
|
|
select count(distinct tinyint_col) from functional.alltypessmall
|
|
having count(bigint_col) > 99
|
|
---- RESULTS
|
|
10
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# Test distinct + non-distinct aggregates without group by and having clause (IMPALA-845)
|
|
# alltypessmall has 100 non-null values for bigint_col, expect an empty result
|
|
select count(distinct tinyint_col) from functional.alltypessmall
|
|
having count(bigint_col) > 100
|
|
---- RESULTS
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# Regression test for COUNT(ALL ) with no group-by and other distinct agg. IMPALA-862
|
|
select count(*), COUNT(distinct 1) from alltypesagg where false
|
|
---- RESULTS
|
|
0,0
|
|
---- TYPES
|
|
bigint, bigint
|
|
====
|
|
---- QUERY
|
|
# Regression test for COUNT(ALL ) with no group-by and other distinct agg. IMPALA-862
|
|
select count(tinyint_col), sum(distinct int_col) from alltypesagg
|
|
---- RESULTS
|
|
9000,499500
|
|
---- TYPES
|
|
bigint, bigint
|
|
====
|
|
---- QUERY
|
|
# Regression test for COUNT(ALL ) with no group-by and other distinct agg. IMPALA-862
|
|
select count(*), COUNT(distinct 1) from alltypesagg
|
|
---- RESULTS
|
|
11000,1
|
|
---- TYPES
|
|
bigint, bigint
|
|
====
|
|
---- QUERY
|
|
# Test distinct + non-distinct aggregation with intermediate tuples (IMPALA-1247).
|
|
select avg(int_col), count(int_col), avg(distinct bigint_col)
|
|
from functional.alltypes
|
|
---- RESULTS
|
|
4.5,7300,45
|
|
---- TYPES
|
|
double, bigint, double
|
|
====
|
|
---- QUERY
|
|
# Test distinct + non-distinct aggregation with intermediate tuples (IMPALA-1247).
|
|
select bool_col, avg(int_col), count(int_col), avg(distinct bigint_col)
|
|
from functional.alltypes
|
|
group by bool_col
|
|
order by bool_col
|
|
---- RESULTS
|
|
false,5,3650,50
|
|
true,4,3650,40
|
|
---- TYPES
|
|
boolean, double, bigint, double
|
|
====
|
|
---- QUERY
|
|
# Test rewriting count(distinct) into NDV() via a query option.
|
|
set appx_count_distinct=true;
|
|
select count(distinct int_col), count(distinct float_col), count(distinct string_col)
|
|
from alltypesagg
|
|
---- RESULTS
|
|
957,1016,963
|
|
---- TYPES
|
|
bigint, bigint, bigint
|
|
====
|
|
---- QUERY
|
|
# Large (more than 1 regular batches) distinct with multiple group by keys.
|
|
SELECT COUNT(*) FROM
|
|
(SELECT COUNT(DISTINCT p_partkey)
|
|
FROM tpch_parquet.part GROUP BY p_brand, p_type, p_size) v
|
|
---- RESULTS
|
|
123039
|
|
---- TYPES
|
|
bigint
|
|
====
|