mirror of
https://github.com/apache/impala.git
synced 2026-01-10 18:00:14 -05:00
In EE tests HS2 returned results with smaller precision than Beeswax for
FLOAT/DOUBLE/TIMESTAMP types. These differences are not inherent to the
HS2 protocol - the results are returned with full precision in Thrift
and lose precision during conversion in client code.
This patch changes to conversion in HS2 to match Beeswax and removes
test section DBAPI_RESULTS that was used to handle the differences:
- float/double: print method is changed from str() to ":.16".format()
- timestamp: impyla's cursor is created with convert_types=False to
avoid conversion to datetime.datetime (which has only
microsec precision)
Note that FLOAT/DOUBLE are still different in impala-shell, this change
only deals with EE tests.
Testing:
- ran the changed tests
Change-Id: If69ae90c6333ff245c2b951af5689e3071f85cb2
Reviewed-on: http://gerrit.cloudera.org:8080/17325
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
581 lines
15 KiB
Plaintext
581 lines
15 KiB
Plaintext
====
|
|
---- QUERY
|
|
# join between three tables, extra join predicates, extra scan predicates, nulls in joins
|
|
# cols
|
|
# (alltypesagg.tinyint_col contains nulls instead of 0s)
|
|
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
|
|
from (
|
|
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
|
|
a.int_col int_col, a.month month,
|
|
b.float_col float_col, b.id id
|
|
from (
|
|
select *
|
|
from alltypesagg a
|
|
where month=1
|
|
) a
|
|
join alltypessmall b
|
|
on (a.smallint_col = b.id)
|
|
) x
|
|
join alltypessmall c on (x.tinyint_col = c.id)
|
|
where x.day=1
|
|
and x.int_col > 899
|
|
and x.float_col > 4.5
|
|
and c.string_col < '7'
|
|
and x.int_col + x.float_col + cast(c.string_col as float) < 1000
|
|
---- RESULTS
|
|
15,15,5,5,915,5.5,'5'
|
|
16,16,6,6,916,6.599999904632568,'6'
|
|
31,31,1,1,931,6.599999904632568,'1'
|
|
32,32,2,2,932,7.699999809265137,'2'
|
|
33,33,3,3,933,8.800000190734863,'3'
|
|
34,34,4,4,934,9.899999618530273,'4'
|
|
41,41,1,1,941,6.599999904632568,'1'
|
|
42,42,2,2,942,7.699999809265137,'2'
|
|
43,43,3,3,943,8.800000190734863,'3'
|
|
44,44,4,4,944,9.899999618530273,'4'
|
|
5,5,5,5,905,5.5,'5'
|
|
55,55,5,5,955,5.5,'5'
|
|
56,56,6,6,956,6.599999904632568,'6'
|
|
6,6,6,6,906,6.599999904632568,'6'
|
|
65,65,5,5,965,5.5,'5'
|
|
66,66,6,6,966,6.599999904632568,'6'
|
|
81,81,1,1,981,6.599999904632568,'1'
|
|
82,82,2,2,982,7.699999809265137,'2'
|
|
83,83,3,3,983,8.800000190734863,'3'
|
|
84,84,4,4,984,9.899999618530273,'4'
|
|
91,91,1,1,991,6.599999904632568,'1'
|
|
---- TYPES
|
|
smallint, int, tinyint, int, int, float, string
|
|
====
|
|
---- QUERY
|
|
# Same join as above, but subquery on the RHS
|
|
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
|
|
from alltypessmall c
|
|
join
|
|
(
|
|
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
|
|
a.int_col int_col, a.month month,
|
|
b.float_col float_col, b.id id
|
|
from alltypessmall b
|
|
join
|
|
(
|
|
select *
|
|
from alltypesagg a
|
|
where month=1
|
|
) a
|
|
on (a.smallint_col = b.id)
|
|
) x
|
|
on (x.tinyint_col = c.id)
|
|
where x.day=1
|
|
and x.int_col > 899
|
|
and x.float_col > 4.5
|
|
and c.string_col < '7'
|
|
and x.int_col + x.float_col + cast(c.string_col as float) < 1000
|
|
---- RESULTS
|
|
15,15,5,5,915,5.5,'5'
|
|
16,16,6,6,916,6.599999904632568,'6'
|
|
31,31,1,1,931,6.599999904632568,'1'
|
|
32,32,2,2,932,7.699999809265137,'2'
|
|
33,33,3,3,933,8.800000190734863,'3'
|
|
34,34,4,4,934,9.899999618530273,'4'
|
|
41,41,1,1,941,6.599999904632568,'1'
|
|
42,42,2,2,942,7.699999809265137,'2'
|
|
43,43,3,3,943,8.800000190734863,'3'
|
|
44,44,4,4,944,9.899999618530273,'4'
|
|
5,5,5,5,905,5.5,'5'
|
|
55,55,5,5,955,5.5,'5'
|
|
56,56,6,6,956,6.599999904632568,'6'
|
|
6,6,6,6,906,6.599999904632568,'6'
|
|
65,65,5,5,965,5.5,'5'
|
|
66,66,6,6,966,6.599999904632568,'6'
|
|
81,81,1,1,981,6.599999904632568,'1'
|
|
82,82,2,2,982,7.699999809265137,'2'
|
|
83,83,3,3,983,8.800000190734863,'3'
|
|
84,84,4,4,984,9.899999618530273,'4'
|
|
91,91,1,1,991,6.599999904632568,'1'
|
|
---- TYPES
|
|
smallint, int, tinyint, int, int, float, string
|
|
====
|
|
---- QUERY
|
|
# aggregate without group by
|
|
select *
|
|
from (
|
|
select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col),
|
|
sum(tinyint_col), avg(tinyint_col)
|
|
from (
|
|
select * from alltypesagg
|
|
) a
|
|
) b
|
|
---- RESULTS
|
|
11000,9000,1,9,45000,5
|
|
---- TYPES
|
|
bigint, bigint, tinyint, tinyint, bigint, double
|
|
====
|
|
---- QUERY
|
|
# aggregate with group-by, having
|
|
select *
|
|
from (
|
|
select int_col % 7 c1, count(*) c2, avg(int_col) c3
|
|
from (
|
|
select * from alltypesagg where day is not null
|
|
) a
|
|
group by 1
|
|
having avg(int_col) > 500 or count(*) = 10
|
|
) b
|
|
where c1 is not null
|
|
and c2 > 10
|
|
---- RESULTS
|
|
4,1430,501
|
|
5,1430,502
|
|
0,1420,500.5
|
|
---- TYPES
|
|
int, bigint, double
|
|
====
|
|
---- QUERY
|
|
# subquery with aggregation and order by/limit, as left-hand side of join;
|
|
# having clause in subquery is transfered to merge agg step in distrib plan
|
|
select *
|
|
from (
|
|
select int_col, count(*)
|
|
from alltypessmall
|
|
where month = 1
|
|
group by int_col
|
|
having count(*) > 2
|
|
order by count(*) desc, int_col limit 5
|
|
) t1
|
|
join alltypes t2 on (t1.int_col = t2.id)
|
|
where month = 1
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0,3,0,true,0,0,0,0,0,0,'01/01/09','0',2009-01-01 00:00:00,2009,1
|
|
1,3,1,false,1,1,1,10,1.100000023841858,10.1,'01/01/09','1',2009-01-01 00:01:00,2009,1
|
|
2,3,2,true,2,2,2,20,2.200000047683716,20.2,'01/01/09','2',2009-01-01 00:02:00.100000000,2009,1
|
|
3,3,3,false,3,3,3,30,3.299999952316284,30.3,'01/01/09','3',2009-01-01 00:03:00.300000000,2009,1
|
|
4,3,4,true,4,4,4,40,4.400000095367432,40.4,'01/01/09','4',2009-01-01 00:04:00.600000000,2009,1
|
|
---- TYPES
|
|
int, bigint, int, boolean, tinyint, smallint, int, bigint, float, double, string, string, timestamp, int, int
|
|
====
|
|
---- QUERY
|
|
# subquery with aggregation and order by/limit, as left-hand side of join;
|
|
# having clause in subquery is transfered to merge agg step in distrib plan
|
|
# Disable the estimation of cardinality for an hdfs table withot stats.
|
|
set DISABLE_HDFS_NUM_ROWS_ESTIMATE=1;
|
|
select *
|
|
from (
|
|
select int_col, count(*)
|
|
from alltypessmall
|
|
where month = 1
|
|
group by int_col
|
|
having count(*) > 2
|
|
order by count(*) desc, int_col limit 5
|
|
) t1
|
|
join alltypes t2 on (t1.int_col = t2.id)
|
|
where month = 1
|
|
---- RESULTS
|
|
0,3,0,true,0,0,0,0,0,0,'01/01/09','0',2009-01-01 00:00:00,2009,1
|
|
1,3,1,false,1,1,1,10,1.100000023841858,10.1,'01/01/09','1',2009-01-01 00:01:00,2009,1
|
|
2,3,2,true,2,2,2,20,2.200000047683716,20.2,'01/01/09','2',2009-01-01 00:02:00.100000000,2009,1
|
|
3,3,3,false,3,3,3,30,3.299999952316284,30.3,'01/01/09','3',2009-01-01 00:03:00.300000000,2009,1
|
|
4,3,4,true,4,4,4,40,4.400000095367432,40.4,'01/01/09','4',2009-01-01 00:04:00.600000000,2009,1
|
|
---- TYPES
|
|
int, bigint, int, boolean, tinyint, smallint, int, bigint, float, double, string, string, timestamp, int, int
|
|
====
|
|
---- QUERY
|
|
select distinct *
|
|
from (
|
|
select bool_col, tinyint_col, count(*)
|
|
from alltypesagg
|
|
group by bool_col, tinyint_col
|
|
having bool_col = true
|
|
) x
|
|
where tinyint_col < 6
|
|
---- RESULTS
|
|
true,2,1000
|
|
true,4,1000
|
|
---- TYPES
|
|
boolean, tinyint, bigint
|
|
====
|
|
---- QUERY
|
|
# distinct w/ explicit select list
|
|
select *
|
|
from (
|
|
select distinct bool_col, tinyint_col
|
|
from (
|
|
select * from alltypesagg where tinyint_col < 7
|
|
) y
|
|
) x
|
|
where bool_col = true
|
|
---- RESULTS
|
|
true,2
|
|
true,4
|
|
true,6
|
|
---- TYPES
|
|
boolean, tinyint
|
|
====
|
|
---- QUERY
|
|
# semi-join on string
|
|
select *
|
|
from (
|
|
select d.*
|
|
from DimTbl d left semi join JoinTbl j on (j.test_name = d.name)
|
|
) x
|
|
where x.name > 'Name1'
|
|
---- RESULTS
|
|
1002,'Name2',94611
|
|
1003,'Name3',94612
|
|
1004,'Name4',94612
|
|
1005,'Name5',94613
|
|
1006,'Name6',94613
|
|
---- TYPES
|
|
bigint, string, int
|
|
====
|
|
---- QUERY
|
|
select j.*, d.*
|
|
from (
|
|
select *
|
|
from JoinTbl a
|
|
) j
|
|
left outer join
|
|
(
|
|
select *
|
|
from DimTbl b
|
|
) d
|
|
on (j.test_name = d.name)
|
|
where j.test_id <= 1006
|
|
---- RESULTS
|
|
1001,'Name1',94611,5000,1001,'Name1',94611
|
|
1002,'Name2',94611,5000,1002,'Name2',94611
|
|
1003,'Name3',94611,5000,1003,'Name3',94612
|
|
1004,'Name4',94611,5000,1004,'Name4',94612
|
|
1005,'Name5',94611,5000,1005,'Name5',94613
|
|
1006,'Name16',94612,15000,NULL,'NULL',NULL
|
|
1006,'Name16',94612,5000,NULL,'NULL',NULL
|
|
1006,'Name16',94616,15000,NULL,'NULL',NULL
|
|
1006,'Name16',94616,5000,NULL,'NULL',NULL
|
|
1006,'Name6',94616,15000,1006,'Name6',94613
|
|
1006,'Name6',94616,5000,1006,'Name6',94613
|
|
---- TYPES
|
|
bigint, string, int, int, bigint, string, int
|
|
====
|
|
---- QUERY
|
|
# TODO: If we apply predicate on d, the result will be incorrect. This is a general
|
|
# predicate evaluation issue.
|
|
#
|
|
select j.*, d.*
|
|
from (
|
|
select *
|
|
from JoinTbl a
|
|
) j
|
|
left outer join
|
|
(
|
|
select *
|
|
from DimTbl b
|
|
) d
|
|
on (j.test_name = d.name)
|
|
where j.test_id <= 1006
|
|
---- RESULTS
|
|
1001,'Name1',94611,5000,1001,'Name1',94611
|
|
1002,'Name2',94611,5000,1002,'Name2',94611
|
|
1003,'Name3',94611,5000,1003,'Name3',94612
|
|
1004,'Name4',94611,5000,1004,'Name4',94612
|
|
1005,'Name5',94611,5000,1005,'Name5',94613
|
|
1006,'Name16',94612,15000,NULL,'NULL',NULL
|
|
1006,'Name16',94612,5000,NULL,'NULL',NULL
|
|
1006,'Name16',94616,15000,NULL,'NULL',NULL
|
|
1006,'Name16',94616,5000,NULL,'NULL',NULL
|
|
1006,'Name6',94616,15000,1006,'Name6',94613
|
|
1006,'Name6',94616,5000,1006,'Name6',94613
|
|
---- TYPES
|
|
bigint, string, int, int, bigint, string, int
|
|
====
|
|
---- QUERY
|
|
# Constant selects in subqueries
|
|
select * from (select 1, 2) x
|
|
---- RESULTS
|
|
1,2
|
|
---- TYPES
|
|
tinyint, tinyint
|
|
====
|
|
---- QUERY
|
|
# Constant selects in subqueries
|
|
select * from (select y from (select 1 y) a where y < 10) b
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
tinyint
|
|
====
|
|
---- QUERY
|
|
# Constant selects in subqueries
|
|
select * from (select 1 union all select 2 union all select * from (select 3) y) x
|
|
---- RESULTS
|
|
1
|
|
2
|
|
3
|
|
---- TYPES
|
|
tinyint
|
|
====
|
|
---- QUERY
|
|
# Join on inline views made up of unions of constant selects
|
|
select * from
|
|
(select 1 a, 2 b union all select 1 a, 2 b) x
|
|
inner join
|
|
(select 1 a, 3 b union all select 1 a, 2 b) y on x.a = y.a
|
|
inner join
|
|
(select 1 a, 3 b union all select 1 a, 3 b) z on z.b = y.b
|
|
---- RESULTS
|
|
1,2,1,3,1,3
|
|
1,2,1,3,1,3
|
|
1,2,1,3,1,3
|
|
1,2,1,3,1,3
|
|
---- TYPES
|
|
tinyint, tinyint, tinyint, tinyint, tinyint, tinyint
|
|
====
|
|
---- QUERY
|
|
# Semi and inner join on a table and on inline views made up of constant selects
|
|
select x.date_string_col, z.* from alltypessmall x
|
|
left semi join
|
|
(select 1 a, 3 b union all select 1 a, 3 b) y on y.a = x.id
|
|
inner join
|
|
(select 1 a, 3 b union all select 1 a, 3 b) z on z.b = x.id + 2
|
|
---- RESULTS
|
|
'01/01/09',1,3
|
|
'01/01/09',1,3
|
|
---- TYPES
|
|
string, tinyint, tinyint
|
|
====
|
|
---- QUERY
|
|
# Values statement in subqueries
|
|
select * from (values(1, 2), (3, 4)) x
|
|
---- RESULTS
|
|
1,2
|
|
3,4
|
|
---- TYPES
|
|
TINYINT, TINYINT
|
|
====
|
|
---- QUERY
|
|
# Values statement in subqueries
|
|
select * from (select y from (values(1 as y), (11)) a where y < 10) b
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
TINYINT
|
|
====
|
|
---- QUERY
|
|
# Values statement in subqueries with union
|
|
select * from (values(1), (2) union all select * from (values(3)) y) x
|
|
---- RESULTS
|
|
1
|
|
2
|
|
3
|
|
---- TYPES
|
|
TINYINT
|
|
====
|
|
---- QUERY
|
|
# Join on inline views made up of values statements
|
|
select * from
|
|
(values(1 a, 2 b), (1, 2)) x
|
|
inner join
|
|
(values(1 a, 3 b), (1, 2)) y on x.a = y.a
|
|
inner join
|
|
(values(1 a, 3 b), (1, 3)) z on z.b = y.b
|
|
---- RESULTS
|
|
1,2,1,3,1,3
|
|
1,2,1,3,1,3
|
|
1,2,1,3,1,3
|
|
1,2,1,3,1,3
|
|
---- TYPES
|
|
TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT
|
|
====
|
|
---- QUERY
|
|
# Semi and inner join on a table and on inline views made up of values statements
|
|
select x.date_string_col, z.* from alltypessmall x
|
|
left semi join
|
|
(values(1 a, 3 b), (1, 3)) y on y.a = x.id
|
|
inner join
|
|
(values(1 a, 3 b), (1, 3)) z on z.b = x.id + 2
|
|
---- RESULTS
|
|
'01/01/09',1,3
|
|
'01/01/09',1,3
|
|
---- TYPES
|
|
STRING, TINYINT, TINYINT
|
|
====
|
|
---- QUERY
|
|
# Select constant with unreferenced aggregate in subquery
|
|
select 1 from (select count(*) from alltypessmall) x
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
tinyint
|
|
====
|
|
---- QUERY
|
|
# Select constant with unreferenced distinct aggregate in subquery
|
|
select 1 from (select count(distinct tinyint_col) from alltypessmall) x
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
tinyint
|
|
====
|
|
---- QUERY
|
|
# Select aggregate from unreferenced aggregate in subquery
|
|
select count(*) from (select count(*) from alltypessmall) x
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# Select * from aggregate in subquery
|
|
select * from (select count(*) from alltypessmall) x
|
|
---- RESULTS
|
|
100
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# Select from aliased aggregate in subquery
|
|
select c from (select count(*) c from alltypessmall) x
|
|
---- RESULTS
|
|
100
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# Select aggregate from aliased aggregate in subquery
|
|
select count(c) from (select count(*) c from alltypessmall) x
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# Select aggregate from aggregate of basetable column in subquery
|
|
select count(1) from (select count(tinyint_col) from alltypessmall) x
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# Select aggregate from aggregate in subquery with group by
|
|
select count(1) from
|
|
(select count(*) from alltypessmall group by tinyint_col) x
|
|
---- RESULTS
|
|
10
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-812
|
|
select bool_col, count(*) from
|
|
(select bool_col FROM ( SELECT bool_col FROM alltypessmall t ) t WHERE bool_col ) t
|
|
group by 1
|
|
---- RESULTS
|
|
true,50
|
|
---- TYPES
|
|
boolean, bigint
|
|
====
|
|
---- QUERY
|
|
# Test that tuple ids from semi-joined table refs are not added in an
|
|
# IsNullPredicate expr; inline view with left semi join between a base table
|
|
# and an inline view (IMPALA-1526)
|
|
select t1.int_col
|
|
from alltypestiny t1 left join
|
|
(select coalesce(t1.year, 384) as int_col
|
|
from alltypesagg t1
|
|
where t1.bigint_col in
|
|
(select day as int_col from alltypesagg where t1.id = day)) t2
|
|
on t2.int_col = t1.month
|
|
where t1.month is not null
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0
|
|
1
|
|
0
|
|
1
|
|
0
|
|
1
|
|
0
|
|
1
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
# Test that tuple ids from semi-joined table refs are not added in
|
|
# an IsNullPredicate expr; inline view with right semi join between
|
|
# two inline views (IMPALA-1526)
|
|
select distinct t1.int_col
|
|
from alltypestiny t1 left join
|
|
(select coalesce(t3.int_col, 384) as int_col
|
|
from (select int_col from alltypes) t1
|
|
right semi join (select int_col from alltypesagg) t3 on t1.int_col = t3.int_col) t2
|
|
on t2.int_col = t1.month
|
|
where t1.month is not null
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0
|
|
1
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-1528
|
|
SELECT t2.int_col is NULL FROM alltypesagg t1
|
|
LEFT JOIN (SELECT COALESCE(76, -937, -981) AS int_col FROM alltypestiny) t2
|
|
ON (t2.int_col) = (t1.year) limit 1
|
|
---- RESULTS
|
|
true
|
|
---- TYPES
|
|
boolean
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1987: Equi-join predicates of outer joins contain TupleIsNullPredicate exprs.
|
|
select t1.int_col, t2.int_col, t3.id
|
|
from alltypestiny t1 left outer join
|
|
(select coalesce(int_col, 384) as int_col from alltypestiny) t2
|
|
on t1.int_col = t2.int_col
|
|
left outer join
|
|
(select 0 as id from alltypestiny) t3
|
|
on t1.int_col = t3.id
|
|
order by 1 limit 5
|
|
---- RESULTS
|
|
0,0,0
|
|
0,0,0
|
|
0,0,0
|
|
0,0,0
|
|
0,0,0
|
|
---- TYPES
|
|
INT,INT,TINYINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-10182: Nulls get eliminated with union-all for duplicate columns
|
|
select count(*) from (
|
|
select c1, c2 from (select tinyint_col c1, tinyint_col c2
|
|
from alltypesagg group by 1, 2) t1
|
|
group by 1, 2
|
|
union all
|
|
select c1, c2 from (select tinyint_col c1, tinyint_col c2
|
|
from alltypesagg group by 1, 2) t1
|
|
group by 1, 2) tt;
|
|
---- RESULTS
|
|
20
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-10182: Nulls get eliminated with union-all for duplicate columns
|
|
# Introduce nulls from the null producing side of left outer join
|
|
with dt1 as (select t2.int_col y from alltypessmall t1
|
|
left outer join alltypestiny t2 on t1.int_col = t2.int_col)
|
|
select * from (
|
|
select c1, c2 from (select dt1.y c1, dt1.y c2 from dt1 group by 1, 2) t1
|
|
group by 1, 2
|
|
union all
|
|
select c1, c2 from (select dt1.y c1, dt1.y c2 from dt1 group by 1, 2) t1
|
|
group by 1, 2) tt order by 1, 2;
|
|
---- RESULTS
|
|
0,0
|
|
0,0
|
|
1,1
|
|
1,1
|
|
NULL,NULL
|
|
NULL,NULL
|
|
---- TYPES
|
|
INT, INT
|
|
==== |