mirror of
https://github.com/apache/impala.git
synced 2026-01-07 00:02:28 -05:00
This patch adds the ability for operators to compute and forward bitmap filters from one operator to another, across fragment and machine boundaries. Filters are provided as part of the plan from the frontend. In this patch hash join nodes produce filters from their build side input, and propagate them, via the query's coordinator, to the scan nodes which provide the probe side for that join. The scan nodes may then filter their rows before they are sent to the join, reducing the amount of work the join has to do. Filters are attached to the local RuntimeState's RuntimeFilterBank by the join node. When complete, they are asynchronously sent to the coordinator via a new UpdateFilter() RPC. The coordinator maintains a routing table that maps incoming filters to their recipient backends. For partitioned joins, the filters must be aggregated from all providers. The coordinator performs this aggregation and transmits the completed filter only when all inputs have been received. In this patch, filtering can occur in up to four places in a scan: 1. Before initial scan ranges are issued (all file formats, partition columns only) 2. Before each scan range is processed (all file formats, partition columns only) 3. Before a row group is processed (Parquet, partition columns only) 4. During assembly of every row (Parquet, any column) This patch also replaces the existing bitmap-based filters with Bloom Filter based ones. The Bloom Filters are statically sized to have an expected false positive rate of 0.1 on 2^20 distinct items. This yields Bloom Filters of 1MB in size. This is configurable by setting --bloom_filter_size, and we will perform tests to determine a good default. The query option RUNTIME_BLOOM_FILTER_SIZE can override the command-line flag on a per-query basis. This patch also simplifies and improves the memory handling for allocated filters by the RuntimeFilterBank. New filters are tracked through the query memory tracker, and owned by the fragment instance's RuntimeState::obj_pool(). This patch also adds a simple heuristic to disable filter creation based on estimated false-positive rate for the Bloom Filter. By default the maximum FP rate is set to 75%. It can be controlled by setting --max_filter_error_rate. Finally, this patch adds short-circuit publication for filters that are broadcast, and does so always even when distributed runtime filter propagation is disabled. To avoid cross-compilation problems, bloom-filter.h was rewritten in C++98. Change-Id: Icea03a87cf1705c1b4aa46f86f13141c4b58da10 Reviewed-on: http://gerrit.cloudera.org:8080/1861 Reviewed-by: Henry Robinson <henry@cloudera.com> Tested-by: Henry Robinson <henry@cloudera.com>
266 lines
6.3 KiB
Plaintext
266 lines
6.3 KiB
Plaintext
select * from functional_seq.alltypes t1 limit 5
|
|
---- PLAN
|
|
00:SCAN HDFS [functional_seq.alltypes t1]
|
|
partitions=24/24 files=24 size=562.59KB
|
|
limit: 5
|
|
---- DISTRIBUTEDPLAN
|
|
00:SCAN HDFS [functional_seq.alltypes t1]
|
|
partitions=24/24 files=24 size=562.59KB
|
|
limit: 5
|
|
====
|
|
# Query is over the limit of 8 rows to be optimized, will distribute the query
|
|
select * from functional.alltypes t1 limit 10
|
|
---- PLAN
|
|
00:SCAN HDFS [functional.alltypes t1]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
limit: 10
|
|
---- DISTRIBUTEDPLAN
|
|
01:EXCHANGE [UNPARTITIONED]
|
|
| limit: 10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes t1]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
limit: 10
|
|
====
|
|
# Query is optimized, run on coordinator only
|
|
select * from functional.alltypes t1 limit 5
|
|
---- PLAN
|
|
00:SCAN HDFS [functional.alltypes t1]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
limit: 5
|
|
---- DISTRIBUTEDPLAN
|
|
00:SCAN HDFS [functional.alltypes t1]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
limit: 5
|
|
====
|
|
# If a predicate is applied the optimization is disabled
|
|
select * from functional.alltypes t1 where t1.id < 99 limit 5
|
|
---- PLAN
|
|
00:SCAN HDFS [functional.alltypes t1]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
predicates: t1.id < 99
|
|
limit: 5
|
|
---- DISTRIBUTEDPLAN
|
|
01:EXCHANGE [UNPARTITIONED]
|
|
| limit: 5
|
|
|
|
|
00:SCAN HDFS [functional.alltypes t1]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
predicates: t1.id < 99
|
|
limit: 5
|
|
====
|
|
# No optimization for hbase tables
|
|
select * from functional_hbase.alltypes t1 where t1.id < 99 limit 5
|
|
---- PLAN
|
|
00:SCAN HBASE [functional_hbase.alltypes t1]
|
|
predicates: t1.id < 99
|
|
limit: 5
|
|
---- DISTRIBUTEDPLAN
|
|
01:EXCHANGE [UNPARTITIONED]
|
|
| limit: 5
|
|
|
|
|
00:SCAN HBASE [functional_hbase.alltypes t1]
|
|
predicates: t1.id < 99
|
|
limit: 5
|
|
====
|
|
# Applies optimization for small queries in hbase
|
|
select * from functional_hbase.alltypes t1 limit 5
|
|
---- PLAN
|
|
00:SCAN HBASE [functional_hbase.alltypes t1]
|
|
limit: 5
|
|
---- DISTRIBUTEDPLAN
|
|
00:SCAN HBASE [functional_hbase.alltypes t1]
|
|
limit: 5
|
|
====
|
|
insert into
|
|
functional_hbase.alltypes
|
|
values (1, 1, true, "1999-12-01", 2.0, 1.0, 1, 12, 2, "abs",
|
|
cast(now() as timestamp), 1, 1999)
|
|
---- PLAN
|
|
WRITE TO HBASE table=functional_hbase.alltypes
|
|
|
|
|
00:UNION
|
|
constant-operands=1
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HBASE table=functional_hbase.alltypes
|
|
|
|
|
00:UNION
|
|
constant-operands=1
|
|
====
|
|
create table tm as select * from functional_hbase.alltypes limit 5
|
|
---- PLAN
|
|
WRITE TO HDFS [default.tm, OVERWRITE=false]
|
|
| partitions=1
|
|
|
|
|
00:SCAN HBASE [functional_hbase.alltypes]
|
|
limit: 5
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [default.tm, OVERWRITE=false]
|
|
| partitions=1
|
|
|
|
|
00:SCAN HBASE [functional_hbase.alltypes]
|
|
limit: 5
|
|
====
|
|
create table tm as select * from functional_hbase.alltypes limit 50
|
|
---- PLAN
|
|
WRITE TO HDFS [default.tm, OVERWRITE=false]
|
|
| partitions=1
|
|
|
|
|
00:SCAN HBASE [functional_hbase.alltypes]
|
|
limit: 50
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [default.tm, OVERWRITE=false]
|
|
| partitions=1
|
|
|
|
|
01:EXCHANGE [UNPARTITIONED]
|
|
| limit: 50
|
|
|
|
|
00:SCAN HBASE [functional_hbase.alltypes]
|
|
limit: 50
|
|
====
|
|
select * from functional_hbase.alltypes limit 5
|
|
union all
|
|
select * from functional_hbase.alltypes limit 2
|
|
---- PLAN
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HBASE [functional_hbase.alltypes]
|
|
| limit: 2
|
|
|
|
|
01:SCAN HBASE [functional_hbase.alltypes]
|
|
limit: 5
|
|
---- DISTRIBUTEDPLAN
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HBASE [functional_hbase.alltypes]
|
|
| limit: 2
|
|
|
|
|
01:SCAN HBASE [functional_hbase.alltypes]
|
|
limit: 5
|
|
====
|
|
select * from functional_hbase.alltypes limit 5
|
|
union all
|
|
select * from functional_hbase.alltypes limit 5
|
|
---- PLAN
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HBASE [functional_hbase.alltypes]
|
|
| limit: 5
|
|
|
|
|
01:SCAN HBASE [functional_hbase.alltypes]
|
|
limit: 5
|
|
---- DISTRIBUTEDPLAN
|
|
00:UNION
|
|
|
|
|
|--04:EXCHANGE [UNPARTITIONED]
|
|
| | limit: 5
|
|
| |
|
|
| 02:SCAN HBASE [functional_hbase.alltypes]
|
|
| limit: 5
|
|
|
|
|
03:EXCHANGE [UNPARTITIONED]
|
|
| limit: 5
|
|
|
|
|
01:SCAN HBASE [functional_hbase.alltypes]
|
|
limit: 5
|
|
====
|
|
# Two scans cannot run in the same fragment. IMPALA-561
|
|
select * from
|
|
functional.testtbl a join functional.testtbl b on a.id = b.id
|
|
---- DISTRIBUTEDPLAN
|
|
04:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: a.id = b.id
|
|
| runtime filters: RF000 <- b.id
|
|
|
|
|
|--03:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 01:SCAN HDFS [functional.testtbl b]
|
|
| partitions=1/1 files=0 size=0B
|
|
|
|
|
00:SCAN HDFS [functional.testtbl a]
|
|
partitions=1/1 files=0 size=0B
|
|
runtime filters: RF000 -> a.id
|
|
====
|
|
select * from
|
|
functional.testtbl a, functional.testtbl b
|
|
---- DISTRIBUTEDPLAN
|
|
04:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
|
|
|
|
|
|--03:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 01:SCAN HDFS [functional.testtbl b]
|
|
| partitions=1/1 files=0 size=0B
|
|
|
|
|
00:SCAN HDFS [functional.testtbl a]
|
|
partitions=1/1 files=0 size=0B
|
|
====
|
|
select * from
|
|
functional.alltypestiny a
|
|
where a.id in (select id from functional.alltypestiny limit 5) limit 5
|
|
---- DISTRIBUTEDPLAN
|
|
05:EXCHANGE [UNPARTITIONED]
|
|
| limit: 5
|
|
|
|
|
02:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
|
|
| hash predicates: a.id = id
|
|
| runtime filters: RF000 <- id
|
|
| limit: 5
|
|
|
|
|
|--04:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 03:EXCHANGE [UNPARTITIONED]
|
|
| | limit: 5
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 files=4 size=460B
|
|
| limit: 5
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny a]
|
|
partitions=4/4 files=4 size=460B
|
|
runtime filters: RF000 -> a.id
|
|
====
|
|
# Test correct single-node planning for mixed union distinct/all (IMPALA-1553).
|
|
select
|
|
id, bool_col
|
|
from functional.alltypestiny a
|
|
where year=2009 and month=1
|
|
union distinct
|
|
select id, bool_col
|
|
from functional.alltypestiny b
|
|
where year=2009 and month=1
|
|
union all
|
|
select id, bool_col
|
|
from functional.alltypestiny c
|
|
where year=2009 and month=2
|
|
---- DISTRIBUTEDPLAN
|
|
04:UNION
|
|
|
|
|
|--03:AGGREGATE [FINALIZE]
|
|
| | group by: id, bool_col
|
|
| |
|
|
| 00:UNION
|
|
| |
|
|
| |--02:SCAN HDFS [functional.alltypestiny b]
|
|
| | partitions=1/4 files=1 size=115B
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypestiny a]
|
|
| partitions=1/4 files=1 size=115B
|
|
|
|
|
05:SCAN HDFS [functional.alltypestiny c]
|
|
partitions=1/4 files=1 size=115B
|
|
====
|
|
# IMPALA-2527: Tests that the small query optimization is disabled for colleciton types
|
|
select key from functional.allcomplextypes.map_map_col.value limit 5;
|
|
---- DISTRIBUTEDPLAN
|
|
01:EXCHANGE [UNPARTITIONED]
|
|
| limit: 5
|
|
|
|
|
00:SCAN HDFS [functional.allcomplextypes.map_map_col.value]
|
|
partitions=0/0 files=0 size=0B
|
|
limit: 5
|
|
====
|