Files
impala/testdata/workloads/functional-planner/queries/PlannerTest/nested-loop-join.test
Taras Bobrovytsky d0f838b66a IMPALA-6340,IMPALA-6518: Check that decimal types are compatible in FE
In this patch we implement strict decimal type checking in the FE in
various situations when DECIMAL_V2 is enabled. What is affected:
- Union. If we union two decimals and it is not possible to come up
  with a decimal that will be able to contain all the digits, an error
  is thrown. For example, the union(decimal(20, 10), decimal(20, 20))
  returns decimal(30, 20). However, for union(decimal(38, 0),
  decimal(38, 38)) the ideal return type would be decimal(76,38), but
  this is too large, so an error is thrown.
- Insert. If we are inserting a decimal value into a column where we are
  not guaranteed that all digits will fit, an error is thrown. For
  example, inserting a decimal(38,0) value into a decimal(38,38) column.
- Functions such as coalesce(). If we are unable to determine the output
  type that guarantees that all digits will fit from all the arguments,
  an error is thrown. For example,
  coalesce(decimal(38,38), decimal(38,0)) will throw an error.
- Hash Join. When joining on two decimals, if a type cannot be
  determined that both columns can be cast to, we throw an error.
  For example, join on decimal(38,0) and decimal(38,38) will result
  in an error.

To avoid these errors, you need to use CAST() on some of the decimals.

In this patch we also change the output decimal calculation of decimal
round, truncate and related functions. If these functions are a no-op,
the resulting decimal type is the same as the input type.

Testing:
- Ran a core build which passed.

Change-Id: Id406f4189e01a909152985fabd5cca7a1527a568
Reviewed-on: http://gerrit.cloudera.org:8080/9930
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2018-04-28 03:33:02 +00:00

275 lines
8.5 KiB
Plaintext

# Right outer joins with non-equi join predicates
select straight_join *
from functional.alltypestiny a right outer join functional.alltypes b
on a.id != b.id or a.int_col < b.int_col
right outer join functional.alltypesagg c
on a.smallint_col >= c.smallint_col
where a.id < 10 and c.bigint_col = 10
---- PLAN
PLAN-ROOT SINK
|
04:NESTED LOOP JOIN [RIGHT OUTER JOIN]
| join predicates: a.smallint_col >= c.smallint_col
| predicates: a.id < 10
|
|--02:SCAN HDFS [functional.alltypesagg c]
| partitions=11/11 files=11 size=814.73KB
| predicates: c.bigint_col = 10
|
03:NESTED LOOP JOIN [RIGHT OUTER JOIN]
| join predicates: a.id != b.id OR a.int_col < b.int_col
|
|--01:SCAN HDFS [functional.alltypes b]
| partitions=24/24 files=24 size=478.45KB
|
00:SCAN HDFS [functional.alltypestiny a]
partitions=4/4 files=4 size=460B
predicates: a.id < 10
---- DISTRIBUTEDPLAN
NotImplementedException: Error generating a valid execution plan for this query. A RIGHT OUTER JOIN type with no equi-join predicates can only be executed with a single node plan.
====
# Right semi joins with non-equi join predicates
select straight_join *
from functional.alltypestiny a right semi join functional.alltypessmall c
on a.tinyint_col > c.tinyint_col
right semi join functional.alltypesagg d
on c.tinyint_col < d.bigint_col
where d.bigint_col < 10
---- PLAN
PLAN-ROOT SINK
|
04:NESTED LOOP JOIN [RIGHT SEMI JOIN]
| join predicates: c.tinyint_col < d.bigint_col
|
|--02:SCAN HDFS [functional.alltypesagg d]
| partitions=11/11 files=11 size=814.73KB
| predicates: d.bigint_col < 10
|
03:NESTED LOOP JOIN [RIGHT SEMI JOIN]
| join predicates: a.tinyint_col > c.tinyint_col
|
|--01:SCAN HDFS [functional.alltypessmall c]
| partitions=4/4 files=4 size=6.32KB
|
00:SCAN HDFS [functional.alltypestiny a]
partitions=4/4 files=4 size=460B
---- DISTRIBUTEDPLAN
NotImplementedException: Error generating a valid execution plan for this query. A RIGHT SEMI JOIN type with no equi-join predicates can only be executed with a single node plan.
====
# Full outer joins with non-equi join predicates
select straight_join *
from functional.alltypestiny a full outer join functional.alltypessmall b
on a.id != b.id or a.int_col != b.int_col
full outer join functional.alltypesagg c
on a.tinyint_col > c.tinyint_col
full outer join functional.alltypes d
on c.int_col > d.int_col
where a.bigint_col != c.bigint_col and a.id < 10
---- PLAN
PLAN-ROOT SINK
|
06:NESTED LOOP JOIN [FULL OUTER JOIN]
| join predicates: c.int_col > d.int_col
| predicates: a.bigint_col != c.bigint_col, a.id < 10
|
|--03:SCAN HDFS [functional.alltypes d]
| partitions=24/24 files=24 size=478.45KB
|
05:NESTED LOOP JOIN [FULL OUTER JOIN]
| join predicates: a.tinyint_col > c.tinyint_col
|
|--02:SCAN HDFS [functional.alltypesagg c]
| partitions=11/11 files=11 size=814.73KB
|
04:NESTED LOOP JOIN [FULL OUTER JOIN]
| join predicates: a.id != b.id OR a.int_col != b.int_col
|
|--01:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 files=4 size=6.32KB
|
00:SCAN HDFS [functional.alltypestiny a]
partitions=4/4 files=4 size=460B
predicates: a.id < 10
---- DISTRIBUTEDPLAN
NotImplementedException: Error generating a valid execution plan for this query. A FULL OUTER JOIN type with no equi-join predicates can only be executed with a single node plan.
====
# Right anti join with non-equi join predicates
select straight_join count(*)
from functional.alltypestiny a right anti join functional.alltypessmall b
on a.id < b.id
where b.int_col = 5
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
|
02:NESTED LOOP JOIN [RIGHT ANTI JOIN]
| join predicates: a.id < b.id
|
|--01:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 files=4 size=6.32KB
| predicates: b.int_col = 5
|
00:SCAN HDFS [functional.alltypestiny a]
partitions=4/4 files=4 size=460B
---- DISTRIBUTEDPLAN
NotImplementedException: Error generating a valid execution plan for this query. A RIGHT ANTI JOIN type with no equi-join predicates can only be executed with a single node plan.
====
# Inner and right joins with non-equi join predicates
select straight_join count(*)
from functional.alltypestiny a inner join functional.alltypessmall b on a.id < b.id
right outer join functional.alltypesagg c on a.int_col != c.int_col
right semi join functional.alltypes d on c.tinyint_col < d.tinyint_col
right anti join functional.alltypesnopart e on d.tinyint_col > e.tinyint_col
where e.id < 10
---- PLAN
PLAN-ROOT SINK
|
09:AGGREGATE [FINALIZE]
| output: count(*)
|
08:NESTED LOOP JOIN [RIGHT ANTI JOIN]
| join predicates: d.tinyint_col > e.tinyint_col
|
|--04:SCAN HDFS [functional.alltypesnopart e]
| partitions=1/1 files=0 size=0B
| predicates: e.id < 10
|
07:NESTED LOOP JOIN [RIGHT SEMI JOIN]
| join predicates: c.tinyint_col < d.tinyint_col
|
|--03:SCAN HDFS [functional.alltypes d]
| partitions=24/24 files=24 size=478.45KB
|
06:NESTED LOOP JOIN [RIGHT OUTER JOIN]
| join predicates: a.int_col != c.int_col
|
|--02:SCAN HDFS [functional.alltypesagg c]
| partitions=11/11 files=11 size=814.73KB
|
05:NESTED LOOP JOIN [INNER JOIN]
| predicates: a.id < b.id
|
|--01:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 files=4 size=6.32KB
|
00:SCAN HDFS [functional.alltypestiny a]
partitions=4/4 files=4 size=460B
---- DISTRIBUTEDPLAN
NotImplementedException: Error generating a valid execution plan for this query. A RIGHT ANTI JOIN type with no equi-join predicates can only be executed with a single node plan.
====
# Right semi and outer joins are inverted to make them executable.
# Same query as above but without the straight join hint.
select count(*)
from functional.alltypestiny a inner join functional.alltypessmall b on a.id < b.id
right outer join functional.alltypesagg c on a.int_col != c.int_col
right semi join functional.alltypes d on c.tinyint_col < d.tinyint_col
right anti join functional.alltypesnopart e on d.tinyint_col > e.tinyint_col
where e.id < 10
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
15:AGGREGATE [FINALIZE]
| output: count:merge(*)
|
14:EXCHANGE [UNPARTITIONED]
|
09:AGGREGATE
| output: count(*)
|
08:NESTED LOOP JOIN [LEFT ANTI JOIN, BROADCAST]
| join predicates: d.tinyint_col > e.tinyint_col
|
|--13:EXCHANGE [BROADCAST]
| |
| 07:NESTED LOOP JOIN [LEFT SEMI JOIN, BROADCAST]
| | join predicates: c.tinyint_col < d.tinyint_col
| |
| |--12:EXCHANGE [BROADCAST]
| | |
| | 06:NESTED LOOP JOIN [LEFT OUTER JOIN, BROADCAST]
| | | join predicates: a.int_col != c.int_col
| | |
| | |--11:EXCHANGE [BROADCAST]
| | | |
| | | 05:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
| | | | predicates: a.id < b.id
| | | |
| | | |--10:EXCHANGE [BROADCAST]
| | | | |
| | | | 00:SCAN HDFS [functional.alltypestiny a]
| | | | partitions=4/4 files=4 size=460B
| | | |
| | | 01:SCAN HDFS [functional.alltypessmall b]
| | | partitions=4/4 files=4 size=6.32KB
| | |
| | 02:SCAN HDFS [functional.alltypesagg c]
| | partitions=11/11 files=11 size=814.73KB
| |
| 03:SCAN HDFS [functional.alltypes d]
| partitions=24/24 files=24 size=478.45KB
|
04:SCAN HDFS [functional.alltypesnopart e]
partitions=1/1 files=0 size=0B
predicates: e.id < 10
====
# IMPALA-5689: Do not invert a left outer join with no equi-join predicates.
select * from (
select distinct int_col
from functional.alltypes) t1
left outer join functional.alltypes t2 on (t2.bigint_col=5)
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:NESTED LOOP JOIN [LEFT OUTER JOIN, BROADCAST]
|
|--06:EXCHANGE [BROADCAST]
| |
| 02:SCAN HDFS [functional.alltypes t2]
| partitions=24/24 files=24 size=478.45KB
| predicates: (t2.bigint_col = 5)
|
05:AGGREGATE [FINALIZE]
| group by: int_col
|
04:EXCHANGE [HASH(int_col)]
|
01:AGGREGATE [STREAMING]
| group by: int_col
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# IMPALA-5689: Do not invert a left semi join with no equi-join predicates.
select * from (
select distinct int_col
from functional.alltypes) t1
left semi join functional.alltypes t2 on (t2.bigint_col=5)
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:NESTED LOOP JOIN [LEFT SEMI JOIN, BROADCAST]
|
|--06:EXCHANGE [BROADCAST]
| |
| 02:SCAN HDFS [functional.alltypes t2]
| partitions=24/24 files=24 size=478.45KB
| predicates: (t2.bigint_col = 5)
|
05:AGGREGATE [FINALIZE]
| group by: int_col
|
04:EXCHANGE [HASH(int_col)]
|
01:AGGREGATE [STREAMING]
| group by: int_col
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====