Files
impala/testdata/workloads/functional-query/queries/QueryTest/top-n.test
Matthew Jacobs 65353fd9fb IMPALA-598: Order by behavior for NULLs should be revisited
This change modifies that behavior of NULL ordering such that nulls always
compare greater than other values, but "nulls first" or "nulls last" can be used
to explicitly specify if nulls should be sorted first or last regardless of the
asc/desc.

Change-Id: I92feda1e7f42249de4009afd39f8395a0a32a2f8
Reviewed-on: http://gerrit.ent.cloudera.com:8080/812
Reviewed-by: Marcel Kornacker <marcel@cloudera.com>
Reviewed-by: Matthew Jacobs <mj@cloudera.com>
Tested-by: Matthew Jacobs <mj@cloudera.com>
2014-01-08 10:53:48 -08:00

804 lines
12 KiB
Plaintext

====
---- QUERY
# Based on Aggregation Queries
select int_col, sum(float_col)
from functional_hbase.alltypessmall
where id < 5
group by 1
order by 2
limit 3
---- RESULTS
0,0
1,1.100000023841858
2,2.200000047683716
---- TYPES
INT, DOUBLE
====
---- QUERY
# Run query without order by
select tinyint_col, count(*)
from alltypesagg
group by 1
limit 10
---- RESULTS
5,1000
8,1000
7,1000
2,1000
NULL,1000
1,1000
4,1000
9,1000
6,1000
3,1000
---- TYPES
TINYINT, BIGINT
====
---- QUERY
# Same query order by asc first col
select tinyint_col, count(*)
from alltypesagg
group by 1
order by 1
limit 10
---- RESULTS
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,1000
---- TYPES
TINYINT, BIGINT
====
---- QUERY
# Same query order by asc first col, NULL should be last
# because it compares greater.
select tinyint_col, count(*)
from alltypesagg
group by 1
order by 1
limit 10
---- RESULTS
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,1000
---- TYPES
TINYINT, BIGINT
====
---- QUERY
# Same query order by asc first col, NULL should be first
select tinyint_col, count(*)
from alltypesagg
group by 1
order by 1 nulls first
limit 10
---- RESULTS
NULL,1000
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
---- TYPES
TINYINT, BIGINT
====
---- QUERY
# Same query but first col desc.
# NULL should be first because it compares greater
select tinyint_col, count(*)
from alltypesagg
group by 1
order by 1 desc
limit 20
---- RESULTS
NULL,1000
9,1000
8,1000
7,1000
6,1000
5,1000
4,1000
3,1000
2,1000
1,1000
---- TYPES
TINYINT, BIGINT
====
---- QUERY
# Same query as above but NULL should be last
select tinyint_col, count(*)
from alltypesagg
group by 1
order by 1 desc nulls last
limit 20
---- RESULTS
9,1000
8,1000
7,1000
6,1000
5,1000
4,1000
3,1000
2,1000
1,1000
NULL,1000
---- TYPES
TINYINT, BIGINT
====
---- QUERY
select date_string_col,int_col
from alltypesagg
order by date_string_col, int_col desc
limit 10
---- RESULTS
'01/01/10',NULL
'01/01/10',999
'01/01/10',998
'01/01/10',997
'01/01/10',996
'01/01/10',995
'01/01/10',994
'01/01/10',993
'01/01/10',992
'01/01/10',991
---- TYPES
STRING, INT
====
---- QUERY
# order by with null tuples in tuple row
select j.*, d.* from JoinTbl j full outer join DimTbl d
on (j.test_id = d.id)
order by j.test_id, j.test_name, j.test_zip, j.alltypes_id, d.name
limit 100
---- RESULTS
1001,'Name1',94611,5000,1001,'Name1',94611
1002,'Name2',94611,5000,1002,'Name2',94611
1003,'Name3',94611,5000,1003,'Name3',94612
1004,'Name4',94611,5000,1004,'Name4',94612
1005,'Name5',94611,5000,1005,'Name5',94613
1006,'Name16',94612,5000,1006,'Name6',94613
1006,'Name16',94612,15000,1006,'Name6',94613
1006,'Name16',94616,5000,1006,'Name6',94613
1006,'Name16',94616,15000,1006,'Name6',94613
1006,'Name6',94616,5000,1006,'Name6',94613
1006,'Name6',94616,15000,1006,'Name6',94613
1106,'Name16',94612,5000,NULL,'NULL',NULL
1106,'Name16',94612,15000,NULL,'NULL',NULL
1106,'Name16',94616,5000,NULL,'NULL',NULL
1106,'Name16',94616,15000,NULL,'NULL',NULL
1106,'Name6',94612,5000,NULL,'NULL',NULL
1106,'Name6',94612,15000,NULL,'NULL',NULL
1106,'Name6',94616,5000,NULL,'NULL',NULL
1106,'Name6',94616,15000,NULL,'NULL',NULL
NULL,'NULL',NULL,NULL,1010,'Name10',94615
NULL,'NULL',NULL,NULL,1007,'Name7',94614
NULL,'NULL',NULL,NULL,1008,'Name8',94614
NULL,'NULL',NULL,NULL,1009,'Name9',94615
---- TYPES
BIGINT, STRING, INT, INT, BIGINT, STRING, INT
====
---- QUERY
# order by multiple cols with nulls
select tinyint_col % 3, smallint_col % 3, count(*)
from alltypesagg
where day = 1
group by 1, 2
order by 1, 2
limit 20
---- RESULTS
0,0,120
0,1,90
0,2,90
1,0,90
1,1,120
1,2,90
2,0,90
2,1,90
2,2,120
NULL,0,30
NULL,1,30
NULL,2,30
NULL,NULL,10
---- TYPES
TINYINT, SMALLINT, BIGINT
====
---- QUERY
select tinyint_col % 3, smallint_col % 3, count(*)
from alltypesagg
where day = 1
group by 1, 2
order by 1, 2 desc
limit 20
---- RESULTS
0,2,90
0,1,90
0,0,120
1,2,90
1,1,120
1,0,90
2,2,120
2,1,90
2,0,90
NULL,NULL,10
NULL,2,30
NULL,1,30
NULL,0,30
---- TYPES
TINYINT, SMALLINT, BIGINT
====
---- QUERY
select tinyint_col % 3, smallint_col % 3, count(*)
from alltypesagg
where day = 1
group by 1, 2
order by 1 desc, 2
limit 20
---- RESULTS
NULL,0,30
NULL,1,30
NULL,2,30
NULL,NULL,10
2,0,90
2,1,90
2,2,120
1,0,90
1,1,120
1,2,90
0,0,120
0,1,90
0,2,90
---- TYPES
TINYINT, SMALLINT, BIGINT
====
---- QUERY
select tinyint_col % 3, smallint_col % 3, count(*)
from alltypesagg
where day = 1
group by 1, 2
order by 1 desc, 2 desc
limit 20
---- RESULTS
NULL,NULL,10
NULL,2,30
NULL,1,30
NULL,0,30
2,2,120
2,1,90
2,0,90
1,2,90
1,1,120
1,0,90
0,2,90
0,1,90
0,0,120
---- TYPES
TINYINT, SMALLINT, BIGINT
====
---- QUERY
# Multiple ordering columns with asc/desc and nulls first/last
select tinyint_col % 3, smallint_col % 3, count(*)
from alltypesagg
where day = 1
group by 1, 2
order by 2 desc nulls last, 1 asc nulls first
limit 20
---- RESULTS
NULL,2,30
0,2,90
1,2,90
2,2,120
NULL,1,30
0,1,90
1,1,120
2,1,90
NULL,0,30
0,0,120
1,0,90
2,0,90
NULL,NULL,10
---- TYPES
TINYINT, SMALLINT, BIGINT
====
---- QUERY
select date_string_col
from alltypessmall
order by date_string_col desc
limit 50
---- RESULTS
'04/03/09'
'04/03/09'
'04/03/09'
'04/03/09'
'04/03/09'
'04/02/09'
'04/02/09'
'04/02/09'
'04/02/09'
'04/02/09'
'04/02/09'
'04/02/09'
'04/02/09'
'04/02/09'
'04/02/09'
'04/01/09'
'04/01/09'
'04/01/09'
'04/01/09'
'04/01/09'
'04/01/09'
'04/01/09'
'04/01/09'
'04/01/09'
'04/01/09'
'03/03/09'
'03/03/09'
'03/03/09'
'03/03/09'
'03/03/09'
'03/02/09'
'03/02/09'
'03/02/09'
'03/02/09'
'03/02/09'
'03/02/09'
'03/02/09'
'03/02/09'
'03/02/09'
'03/02/09'
'03/01/09'
'03/01/09'
'03/01/09'
'03/01/09'
'03/01/09'
'03/01/09'
'03/01/09'
'03/01/09'
'03/01/09'
'03/01/09'
---- TYPES
STRING
====
---- QUERY
# Based on join queries
select a.tinyint_col, b.id, a.string_col
from alltypesagg a join alltypessmall b on (a.tinyint_col = b.id)
where a.month=1
and a.day=1
and a.tinyint_col + b.tinyint_col < 5
and a.string_col > '88'
and b.bool_col = false
order by a.string_col
limit 5
---- RESULTS
1,1,'881'
1,1,'891'
1,1,'901'
1,1,'91'
1,1,'911'
---- TYPES
TINYINT, INT, STRING
====
---- QUERY
select a.tinyint_col, b.id, a.string_col
from alltypesagg a join alltypessmall b on (a.tinyint_col = b.id)
where a.month=1
and a.day=1
and a.tinyint_col + b.tinyint_col < 5
and a.string_col > '88'
and b.bool_col = false
order by a.string_col desc
limit 5
---- RESULTS
1,1,'991'
1,1,'981'
1,1,'971'
1,1,'961'
1,1,'951'
---- TYPES
TINYINT, INT, STRING
====
---- QUERY
select a.smallint_col, b.id, a.tinyint_col, c.id, a.int_col, b.float_col, c.string_col
from alltypesagg a
join alltypessmall b on (a.smallint_col = b.id)
join alltypessmall c on (a.tinyint_col = c.id)
where a.month=1
and a.day=1
and a.int_col > 899
and b.float_col > 4.5
and c.string_col < '7'
and a.int_col + b.float_col + cast(c.string_col as float) < 1000
order by c.string_col desc, a.smallint_col
limit 10
---- RESULTS
6,6,6,6,906,6.599999904632568,'6'
16,16,6,6,916,6.599999904632568,'6'
56,56,6,6,956,6.599999904632568,'6'
66,66,6,6,966,6.599999904632568,'6'
5,5,5,5,905,5.5,'5'
15,15,5,5,915,5.5,'5'
55,55,5,5,955,5.5,'5'
65,65,5,5,965,5.5,'5'
34,34,4,4,934,9.899999618530273,'4'
44,44,4,4,944,9.899999618530273,'4'
---- TYPES
SMALLINT, INT, TINYINT, INT, INT, FLOAT, STRING
====
---- QUERY
# Order by a column that is not in the select list
# Query with ordering column in select list
# Don't include date_string_col, it comes back in random order.
select int_col, tinyint_col
from alltypessmall
order by int_col desc
limit 20
---- RESULTS
9,9
9,9
9,9
9,9
9,9
9,9
9,9
9,9
8,8
8,8
8,8
8,8
8,8
8,8
8,8
8,8
7,7
7,7
7,7
7,7
---- TYPES
INT, TINYINT
====
---- QUERY
# Same query with ordering col not in select list
select tinyint_col
from alltypessmall
order by int_col desc
limit 20
---- RESULTS
9
9
9
9
9
9
9
9
8
8
8
8
8
8
8
8
7
7
7
7
---- TYPES
TINYINT
====
---- QUERY
# Order by many exprs
select year, month, count(*)
from alltypes
group by 1, 2
order by 1, 2
limit 100
---- RESULTS
2009,1,310
2009,2,280
2009,3,310
2009,4,300
2009,5,310
2009,6,300
2009,7,310
2009,8,310
2009,9,300
2009,10,310
2009,11,300
2009,12,310
2010,1,310
2010,2,280
2010,3,310
2010,4,300
2010,5,310
2010,6,300
2010,7,310
2010,8,310
2010,9,300
2010,10,310
2010,11,300
2010,12,310
---- TYPES
INT, INT, BIGINT
====
---- QUERY
# More Complex Ordering Exprs
select int_col % 7, count(*), avg(tinyint_col)
from alltypesagg
group by 1
order by avg(tinyint_col)
limit 10
---- RESULTS
4,1430,4.984496124031008
6,1420,4.9921875
1,1430,4.992248062015504
3,1430,5
5,1430,5.007751937984496
0,1420,5.0078125
2,1430,5.015503875968992
NULL,10,NULL
---- TYPES
INT, BIGINT, DOUBLE
====
---- QUERY
select int_col % 7, count(*), max(int_col)
from alltypesagg
group by 1
order by max(int_col)
limit 10
---- RESULTS
6,1420,993
0,1420,994
1,1430,995
2,1430,996
3,1430,997
4,1430,998
5,1430,999
NULL,10,NULL
---- TYPES
INT, BIGINT, INT
====
---- QUERY
select int_col % 5, count(*), avg(tinyint_col) - avg(float_col)
from alltypesagg
group by 1
order by avg(tinyint_col) - avg(float_col) desc
limit 10
---- RESULTS
NULL,10,NULL
1,2000,-544.8499889141322
2,2000,-544.9500045645237
0,1990,-545
3,2000,-545.0499953591824
4,2000,-545.1500110459327
---- TYPES
INT, BIGINT, DOUBLE
====
---- QUERY
select int_col
from alltypessmall
order by int_col % 5, int_col
limit 100
---- RESULTS
0
0
0
0
0
0
0
0
0
0
0
0
5
5
5
5
5
5
5
5
1
1
1
1
1
1
1
1
1
1
1
1
6
6
6
6
6
6
6
6
2
2
2
2
2
2
2
2
2
2
2
2
7
7
7
7
7
7
7
7
3
3
3
3
3
3
3
3
3
3
3
3
8
8
8
8
8
8
8
8
4
4
4
4
4
4
4
4
4
4
4
4
9
9
9
9
9
9
9
9
---- TYPES
INT
====
---- QUERY
# All select list items have an implicit alias. Test that the order by column ref
# "int_col" is correctly aliased to t1.int_col, and therefore it is not an
# ambiguous reference.
select t1.int_col from alltypessmall t1, alltypessmall t2 where t1.id = t2.id
order by int_col
limit 2
---- RESULTS
0
0
---- TYPES
INT
====
---- QUERY
select date_sub(timestamp_col, id), timestamp_col, id
from alltypessmall order by 1 limit 20
---- RESULTS
2008-12-10 00:24:00.960000000,2009-01-03 00:24:00.960000000,24
2008-12-11 00:23:00.930000000,2009-01-03 00:23:00.930000000,23
2008-12-12 00:22:00.910000000,2009-01-03 00:22:00.910000000,22
2008-12-13 00:21:00.900000000,2009-01-03 00:21:00.900000000,21
2008-12-14 00:19:00.810000000,2009-01-02 00:19:00.810000000,19
2008-12-14 00:20:00.900000000,2009-01-03 00:20:00.900000000,20
2008-12-15 00:18:00.730000000,2009-01-02 00:18:00.730000000,18
2008-12-16 00:17:00.660000000,2009-01-02 00:17:00.660000000,17
2008-12-16 00:24:00.960000000,2009-02-03 00:24:00.960000000,49
2008-12-17 00:16:00.600000000,2009-01-02 00:16:00.600000000,16
2008-12-17 00:23:00.930000000,2009-02-03 00:23:00.930000000,48
2008-12-18 00:15:00.550000000,2009-01-02 00:15:00.550000000,15
2008-12-18 00:22:00.910000000,2009-02-03 00:22:00.910000000,47
2008-12-19 00:14:00.510000000,2009-01-02 00:14:00.510000000,14
2008-12-19 00:21:00.900000000,2009-02-03 00:21:00.900000000,46
2008-12-19 00:24:00.960000000,2009-03-03 00:24:00.960000000,74
2008-12-20 00:13:00.480000000,2009-01-02 00:13:00.480000000,13
2008-12-20 00:19:00.810000000,2009-02-02 00:19:00.810000000,44
2008-12-20 00:20:00.900000000,2009-02-03 00:20:00.900000000,45
2008-12-20 00:23:00.930000000,2009-03-03 00:23:00.930000000,73
---- TYPES
TIMESTAMP, TIMESTAMP, INT
====
---- QUERY
# Test of order by with NULL tuple rows (from an outer join)
select t1.id, t1.int_col, t2.id, t2.int_col
from alltypesagg t1
left outer join alltypessmall t2
on (t1.int_col = t2.int_col)
order by t1.id,t2.id limit 10
---- TYPES
int,int,int,int
---- RESULTS
0,NULL,NULL,NULL
1,1,1,1
1,1,11,1
1,1,21,1
1,1,26,1
1,1,36,1
1,1,46,1
1,1,51,1
1,1,61,1
1,1,71,1
====
---- QUERY
# Test limit 0 from sub query
select sum(a.int_col) from
(select int_col from functional.alltypes order by int_col limit 0) a
---- TYPES
bigint
---- RESULTS
NULL
====
---- QUERY
# Test queries with divide by 0
select if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) as v
from alltypestiny order by v desc limit 100;
---- TYPES
DOUBLE
---- RESULTS
inf
1
0
-1
-3
-inf
-inf
-nan
====
---- QUERY
# Test queries with divide by 0
select if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) as v
from alltypestiny order by v asc limit 100;
---- TYPES
DOUBLE
---- RESULTS
-nan
-inf
-inf
-3
-1
0
1
inf
====