From 13a3d19a2c2e73ec794083db52eae20fc4ea3f84 Mon Sep 17 00:00:00 2001 From: Fang-Yu Rao Date: Mon, 5 Aug 2024 16:44:16 -0700 Subject: [PATCH] IMPALA-13250: [DOCS] Document ENABLED_RUNTIME_FILTER_TYPES query option This patch documents the ENABLED_RUNTIME_FILTER_TYPES query option based on the respective code comments in ImpalaService.thrift and query-options.cc. Change-Id: Ib7a34782bed6f812fedf717d8a076e2706f0bba9 Reviewed-on: http://gerrit.cloudera.org:8080/21645 Tested-by: Impala Public Jenkins Reviewed-by: Quanlong Huang --- docs/impala.ditamap | 1 + docs/shared/ImpalaVariables.xml | 1 + docs/shared/impala_common.xml | 19 +++- .../impala_enabled_runtime_filter_types.xml | 89 +++++++++++++++++++ 4 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 docs/topics/impala_enabled_runtime_filter_types.xml diff --git a/docs/impala.ditamap b/docs/impala.ditamap index e896229f3..bab560a6e 100644 --- a/docs/impala.ditamap +++ b/docs/impala.ditamap @@ -241,6 +241,7 @@ under the License. + diff --git a/docs/shared/ImpalaVariables.xml b/docs/shared/ImpalaVariables.xml index 5caf28907..d7236f06d 100644 --- a/docs/shared/ImpalaVariables.xml +++ b/docs/shared/ImpalaVariables.xml @@ -42,6 +42,7 @@ under the License. The docs included with a distro can refer to the distro release number by editing the values here.
    +
  • Impala 4.0
  • Impala 3.4
  • Impala 3.3
  • Impala 3.2
  • diff --git a/docs/shared/impala_common.xml b/docs/shared/impala_common.xml index 27eca96a7..091f6c85d 100644 --- a/docs/shared/impala_common.xml +++ b/docs/shared/impala_common.xml @@ -1524,7 +1524,20 @@ alter table partitioned_data set tblproperties ('numRows'='1030000', 'STATS_GENE this animated demo.

    - + +

    + Impala backend expects comma separated values to be in quotes when executing the + SET statement. + This is usually the case when running SET statement like + SET ENABLED_RUNTIME_FILTER_TYPES="value1,value2" using a JDBC + driver. When using Impala-shell client, the SET statement is not + executed immediately but query options are updated in the client and applied as + part of the following statement, so no quotes are required for Impala-shell. That + is, we use SET statement like + SET ENABLED_RUNTIME_FILTER_TYPES=value1,value2 when + submitting the query to Impala backend via Impala-shell client. +

    +

    Because the runtime filtering feature is enabled by default only for local processing, the other filtering-related query options have @@ -3419,6 +3432,10 @@ flight_num: INT32 SNAPPY DO:83456393 FPO:83488603 SZ:10216514/11474301 needed to represent each value.

    +

    + Added in: +

    +

    Added in:

    diff --git a/docs/topics/impala_enabled_runtime_filter_types.xml b/docs/topics/impala_enabled_runtime_filter_types.xml new file mode 100644 index 000000000..8faf6fbf4 --- /dev/null +++ b/docs/topics/impala_enabled_runtime_filter_types.xml @@ -0,0 +1,89 @@ + + + + + + ENABLED_RUNTIME_FILTER_TYPES Query Option (<keyword keyref="impala40"/> or higher only) + ENABLED RUNTIME FILTER TYPES + + + + + + + + + + + + +

    + ENABLED_RUNTIME_FILTER_TYPES query option + The ENABLED_RUNTIME_FILTER_TYPES query option + sets enabled runtime filter types to be applied to scanners. + This option only applies to HDFS scan nodes and Kudu scan nodes. + The following types are supported. + Specify the enabled types by a comma-separated list of the following values + or enable all types by "ALL". +

      +
    • + BLOOM +
    • +
    • + MIN_MAX +
    • +
    • + IN_LIST +
    • +
    +

    + +

    + Default: "BLOOM,MIN_MAX" +

    +

    + +

    + +

    + +

    + +

    + Depending on the scan node type, Planner can schedule compatible runtime filter + types as follows. +

      +
    • Kudu scan: BLOOM, MIN_MAX
    • +
    • + HDFS scan on Parquet files: BLOOM, MIN_MAX +
    • +
    • HDFS scan on ORC files: BLOOM, IN_LIST
    • +
    • HDFS scan on other kinds of files: BLOOM
    • +
    +

    + +

    +

    + , + +

    + +
    +