mirror of
https://github.com/apache/impala.git
synced 2026-01-04 09:00:56 -05:00
Change-Id: I4e6080a2b196926e46b1e641f6530ba1fa9bd444 Reviewed-on: http://gerrit.cloudera.org:8080/8577 Reviewed-by: Sailesh Mukil <sailesh@cloudera.com> Tested-by: Impala Public Jenkins
3842 lines
168 KiB
XML
3842 lines
168 KiB
XML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<!--
|
|
Licensed to the Apache Software Foundation (ASF) under one
|
|
or more contributor license agreements. See the NOTICE file
|
|
distributed with this work for additional information
|
|
regarding copyright ownership. The ASF licenses this file
|
|
to you under the Apache License, Version 2.0 (the
|
|
"License"); you may not use this file except in compliance
|
|
with the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing,
|
|
software distributed under the License is distributed on an
|
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
KIND, either express or implied. See the License for the
|
|
specific language governing permissions and limitations
|
|
under the License.
|
|
-->
|
|
<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
|
|
<concept rev="ver" id="new_features">
|
|
|
|
<title><ph audience="standalone">New Features in Apache Impala</ph><ph audience="integrated">What's New in Apache Impala</ph></title>
|
|
|
|
<prolog>
|
|
<metadata>
|
|
<data name="Category" value="Impala"/>
|
|
<data name="Category" value="Release Notes"/>
|
|
<data name="Category" value="New Features"/>
|
|
<data name="Category" value="What's New"/>
|
|
<data name="Category" value="Getting Started"/>
|
|
<data name="Category" value="Upgrading"/>
|
|
<data name="Category" value="Administrators"/>
|
|
<data name="Category" value="Developers"/>
|
|
<data name="Category" value="Data Analysts"/>
|
|
</metadata>
|
|
</prolog>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
This release of Impala contains the following changes and enhancements from previous releases.
|
|
</p>
|
|
|
|
<p outputclass="toc inpage"/>
|
|
|
|
</conbody>
|
|
|
|
<!-- All 2.10.x new features go under here -->
|
|
|
|
<concept rev="2.10.0" id="new_features_2100">
|
|
|
|
<title>New Features in <keyword keyref="impala210_full"/></title>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
For the full list of issues closed in this release, including the issues
|
|
marked as <q>new features</q> or <q>improvements</q>, see the
|
|
<xref keyref="changelog_210">changelog for <keyword keyref="impala210"/></xref>.
|
|
</p>
|
|
|
|
</conbody>
|
|
</concept>
|
|
|
|
<!-- All 2.9.x new features go under here -->
|
|
|
|
<concept rev="2.9.0" id="new_features_290">
|
|
|
|
<title>New Features in <keyword keyref="impala29_full"/></title>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
For the full list of issues closed in this release, including the issues
|
|
marked as <q>new features</q> or <q>improvements</q>, see the
|
|
<xref keyref="changelog_29">changelog for <keyword keyref="impala29"/></xref>.
|
|
</p>
|
|
|
|
<p>
|
|
The following are some of the most significant new features in this release:
|
|
</p>
|
|
|
|
<ul id="feature_list">
|
|
<li>
|
|
<p rev="IMPALA-4729">
|
|
A new function, <codeph>replace()</codeph>, which is faster than
|
|
<codeph>regexp_replace()</codeph> for simple string substitutions.
|
|
See <xref keyref="string_functions"/> for details.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="2.9.0 IMPALA-3807 IMPALA-5147 IMPALA-5503">
|
|
Startup flags for the <cmdname>impalad</cmdname> daemon, <codeph>is_executor</codeph>
|
|
and <codeph>is_coordinator</codeph>, let you divide the work on a large, busy cluster
|
|
between a small number of hosts acting as query coordinators, and a larger number of
|
|
hosts acting as query executors. By default, each host can act in both roles,
|
|
potentially introducing bottlenecks during heavily concurrent workloads.
|
|
See <xref keyref="scalability_coordinator"/> for details.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
</concept>
|
|
|
|
<!-- All 2.8.x new features go under here -->
|
|
|
|
<concept rev="2.8.0" id="new_features_280">
|
|
|
|
<title>New Features in <keyword keyref="impala28_full"/></title>
|
|
|
|
<conbody>
|
|
|
|
<ul id="feature_list">
|
|
<li>
|
|
<p>
|
|
Performance and scalability improvements:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<p rev="IMPALA-4572">
|
|
The <codeph>COMPUTE STATS</codeph> statement can
|
|
take advantage of multithreading.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-4135">
|
|
Improved scalability for highly concurrent loads by reducing the possibility of TCP/IP timeouts.
|
|
A configuration setting, <codeph>accepted_cnxn_queue_depth</codeph>, can be adjusted upwards to
|
|
avoid this type of timeout on large clusters.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Several performance improvements were made to the mechanism for generating native code:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<p rev="IMPALA-3638">
|
|
Some queries involving analytic functions can take better advantage of native code generation.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-4008">
|
|
Modules produced during intermediate code generation are organized
|
|
to be easier to cache and reuse during the lifetime of a long-running or complicated query.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-4397 IMPALA-1430">
|
|
The <codeph>COMPUTE STATS</codeph> statement is more efficient
|
|
(less time for the codegen phase) for tables with a large number
|
|
of columns, especially for tables containing <codeph>TIMESTAMP</codeph>
|
|
columns.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3838 IMPALA-4495">
|
|
The logic for determining whether or not to use a runtime filter is more reliable, and the
|
|
evaluation process itself is faster because of native code generation.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3902">
|
|
The <codeph>MT_DOP</codeph> query option enables
|
|
multithreading for a number of Impala operations.
|
|
<codeph>COMPUTE STATS</codeph> statements for Parquet tables
|
|
use a default of <codeph>MT_DOP=4</codeph> to improve the
|
|
intra-node parallelism and CPU efficiency of this data-intensive
|
|
operation.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-4397">
|
|
The <codeph>COMPUTE STATS</codeph> statement is more efficient
|
|
(less time for the codegen phase) for tables with a large number
|
|
of columns.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2521">
|
|
A new hint, <codeph>CLUSTERED</codeph>,
|
|
allows Impala <codeph>INSERT</codeph> operations on a Parquet table
|
|
that use dynamic partitioning to process a high number of
|
|
partitions in a single statement. The data is ordered based on the
|
|
partition key columns, and each partition is only written
|
|
by a single host, reducing the amount of memory needed to buffer
|
|
Parquet data while the data blocks are being constructed.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3552">
|
|
The new configuration setting <codeph>inc_stats_size_limit_bytes</codeph>
|
|
lets you reduce the load on the catalog server when running the
|
|
<codeph>COMPUTE INCREMENTAL STATS</codeph> statement for very large tables.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-1788">
|
|
Impala folds many constant expressions within query statements,
|
|
rather than evaluating them for each row. This optimization
|
|
is especially useful when using functions to manipulate and
|
|
format <codeph>TIMESTAMP</codeph> values, such as the result
|
|
of an expression such as <codeph>to_date(now() - interval 1 day)</codeph>.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-4529">
|
|
Parsing of complicated expressions is faster. This speedup is
|
|
especially useful for queries containing large <codeph>CASE</codeph>
|
|
expressions.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-4302">
|
|
Evaluation is faster for <codeph>IN</codeph> operators with many constant
|
|
arguments. The same performance improvement applies to other functions
|
|
with many constant arguments.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-1286">
|
|
Impala optimizes identical comparison operators within multiple <codeph>OR</codeph>
|
|
blocks.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-4193 IMPALA-3342">
|
|
The reporting for wall-clock times and total CPU time in profile output is more accurate.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3671">
|
|
A new query option, <codeph>SCRATCH_LIMIT</codeph>, lets you restrict the amount of
|
|
space used when a query exceeds the memory limit and activates the <q>spill to disk</q> mechanism.
|
|
This option helps to avoid runaway queries or make queries <q>fail fast</q> if they require more
|
|
memory than anticipated. You can prevent runaway queries from using excessive amounts of spill space,
|
|
without restarting the cluster to turn the spilling feature off entirely.
|
|
See <xref href="impala_scratch_limit.xml#scratch_limit"/> for details.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Integration with Apache Kudu:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<p rev="">
|
|
The experimental Impala support for the Kudu storage layer has been folded
|
|
into the main Impala development branch. Impala can now directly access Kudu tables,
|
|
opening up new capabilities such as enhanced DML operations and continuous ingestion.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="">
|
|
The <codeph>DELETE</codeph> statement is a flexible way to remove data from a Kudu table. Previously,
|
|
removing data from an Impala table involved removing or rewriting the underlying data files, dropping entire partitions,
|
|
or rewriting the entire table. This Impala statement only works for Kudu tables.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="">
|
|
The <codeph>UPDATE</codeph> statement is a flexible way to modify data within a Kudu table. Previously,
|
|
updating data in an Impala table involved replacing the underlying data files, dropping entire partitions,
|
|
or rewriting the entire table. This Impala statement only works for Kudu tables.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3725">
|
|
The <codeph>UPSERT</codeph> statement is a flexible way to ingest, modify, or both data within a Kudu table. Previously,
|
|
ingesting data that might contain duplicates involved an inefficient multi-stage operation, and there was no
|
|
built-in protection against duplicate data. The <codeph>UPSERT</codeph> statement, in combination with
|
|
the primary key designation for Kudu tables, lets you add or replace rows in a single operation, and
|
|
automatically avoids creating any duplicate data.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3719 IMPALA-3726">
|
|
The <codeph>CREATE TABLE</codeph> statement gains some new clauses that are specific to Kudu tables:
|
|
<codeph>PARTITION BY</codeph>, <codeph>PARTITIONS</codeph>, <codeph>STORED AS KUDU</codeph>, and column
|
|
attributes <codeph>PRIMARY KEY</codeph>, <codeph>NULL</codeph> and <codeph>NOT NULL</codeph>,
|
|
<codeph>ENCODING</codeph>, <codeph>COMPRESSION</codeph>, <codeph>DEFAULT</codeph>, and <codeph>BLOCK_SIZE</codeph>.
|
|
These clauses replace the explicit <codeph>TBLPROPERTIES</codeph> settings that were required in the
|
|
early experimental phases of integration between Impala and Kudu.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2890">
|
|
The <codeph>ALTER TABLE</codeph> statement can change certain attributes of Kudu tables.
|
|
You can add, drop, or rename columns.
|
|
You can add or drop range partitions.
|
|
You can change the <codeph>TBLPROPERTIES</codeph> value to rename or point to a different underlying Kudu table,
|
|
independently from the Impala table name in the metastore database.
|
|
You cannot change the data type of an existing column in a Kudu table.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-4403">
|
|
The <codeph>SHOW PARTITIONS</codeph> statement displays information about the distribution of data
|
|
between partitions in Kudu tables. A new variation, <codeph>SHOW RANGE PARTITIONS</codeph>,
|
|
displays information about the Kudu-specific partitions that apply across ranges of key values.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-4379">
|
|
Not all Impala data types are supported in Kudu tables. In particular, currently the Impala
|
|
<codeph>TIMESTAMP</codeph> type is not allowed in a Kudu table. Impala does not recognize the
|
|
<codeph>UNIXTIME_MICROS</codeph> Kudu type when it is present in a Kudu table. (These two
|
|
representations of date/time data use different units and are not directly compatible.)
|
|
You cannot create columns of type <codeph>TIMESTAMP</codeph>, <codeph>DECIMAL</codeph>,
|
|
<codeph>VARCHAR</codeph>, or <codeph>CHAR</codeph> within a Kudu table. Within a query, you can
|
|
cast values in a result set to these types. Certain types, such as <codeph>BOOLEAN</codeph>,
|
|
cannot be used as primary key columns.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="">
|
|
Currently, Kudu tables are not interchangeable between Impala and Hive the way other kinds of Impala tables are.
|
|
Although the metadata for Kudu tables is stored in the metastore database, currently Hive cannot access Kudu tables.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="">
|
|
The <codeph>INSERT</codeph> statement works for Kudu tables. The organization
|
|
of the Kudu data makes it more efficient than with HDFS-backed tables to insert
|
|
data in small batches, such as with the <codeph>INSERT ... VALUES</codeph> syntax.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-4283">
|
|
Some audit data is recorded for data governance purposes.
|
|
All <codeph>UPDATE</codeph>, <codeph>DELETE</codeph>, and <codeph>UPSERT</codeph> statements are characterized
|
|
as <codeph>INSERT</codeph> operations in the audit log. Currently, lineage metadata is not generated for
|
|
<codeph>UPDATE</codeph> and <codeph>DELETE</codeph> operations on Kudu tables.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-4000">
|
|
Currently, Kudu tables have limited support for Sentry:
|
|
<ul>
|
|
<li>
|
|
<p>
|
|
Access to Kudu tables must be granted to roles as usual.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Currently, access to a Kudu table through Sentry is <q>all or nothing</q>.
|
|
You cannot enforce finer-grained permissions such as at the column level,
|
|
or permissions on certain operations such as <codeph>INSERT</codeph>.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Only users with <codeph>ALL</codeph> privileges on <codeph>SERVER</codeph> can create external Kudu tables.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
Because non-SQL APIs can access Kudu data without going through Sentry
|
|
authorization, currently the Sentry support is considered preliminary.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-4571">
|
|
Equality and <codeph>IN</codeph> predicates in Impala queries are pushed to
|
|
Kudu and evaluated efficiently by the Kudu storage layer.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>
|
|
<p rev="">
|
|
<b>Security:</b>
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<p>
|
|
Impala can take advantage of the S3 encrypted credential
|
|
store, to avoid exposing the secret key when accessing
|
|
data stored on S3.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>
|
|
<p rev="">
|
|
The <codeph>REFRESH</codeph> statement now updates information about HDFS block locations.
|
|
Therefore, you can perform a fast and efficient <codeph>REFRESH</codeph> after doing an HDFS
|
|
rebalancing operation instead of the more expensive <codeph>INVALIDATE METADATA</codeph> statement.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-1654">
|
|
[<xref keyref="IMPALA-1654">IMPALA-1654</xref>]
|
|
Several kinds of DDL operations
|
|
can now work on a range of partitions. The partitions can be specified
|
|
using operators such as <codeph><</codeph>, <codeph>>=</codeph>, and
|
|
<codeph>!=</codeph> rather than just an equality predicate applying to a single
|
|
partition.
|
|
This new feature extends the syntax of several clauses
|
|
of the <codeph>ALTER TABLE</codeph> statement
|
|
(<codeph>DROP PARTITION</codeph>, <codeph>SET [UN]CACHED</codeph>,
|
|
<codeph>SET FILEFORMAT | SERDEPROPERTIES | TBLPROPERTIES</codeph>),
|
|
the <codeph>SHOW FILES</codeph> statement, and the
|
|
<codeph>COMPUTE INCREMENTAL STATS</codeph> statement.
|
|
It does not apply to statements that are defined to only apply to a single
|
|
partition, such as <codeph>LOAD DATA</codeph>, <codeph>ALTER TABLE ... ADD PARTITION</codeph>,
|
|
<codeph>SET LOCATION</codeph>, and <codeph>INSERT</codeph> with a static
|
|
partitioning clause.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3973">
|
|
The <codeph>instr()</codeph> function has optional second and third arguments, representing
|
|
the character to position to begin searching for the substring, and the Nth occurrence
|
|
of the substring to find.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3441 IMPALA-4387">
|
|
Improved error handling for malformed Avro data. In particular, incorrect
|
|
precision or scale for <codeph>DECIMAL</codeph> types is now handled.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Impala debug web UI:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<p rev="IMPALA-1169">
|
|
In addition to <q>inflight</q> and <q>finished</q> queries, the web UI
|
|
now also includes a section for <q>queued</q> queries.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-4048">
|
|
The <uicontrol>/sessions</uicontrol> tab now clarifies how many of the displayed
|
|
sections are active, and lets you sort by <uicontrol>Expired</uicontrol> status
|
|
to distinguish active sessions from expired ones.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-4020">
|
|
Improved stability when DDL operations such as <codeph>CREATE DATABASE</codeph>
|
|
or <codeph>DROP DATABASE</codeph> are run in Hive at the same time as an Impala
|
|
<codeph>INVALIDATE METADATA</codeph> statement.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-1616">
|
|
The <q>out of memory</q> error report was made more user-friendly, with additional
|
|
diagnostic information to help identify the spot where the memory limit was exceeded.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3983 IMPALA-3974">
|
|
Improved disk space usage for Java-based UDFs. Temporary copies of the associated JAR
|
|
files are removed when no longer needed, so that they do not accumulate across restarts
|
|
of the <cmdname>catalogd</cmdname> daemon and potentially cause an out-of-space condition.
|
|
These temporary files are also created in the directory specified by the <codeph>local_library_dir</codeph>
|
|
configuration setting, so that the storage for these temporary files can be independent
|
|
from any capacity limits on the <filepath>/tmp</filepath> filesystem.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
</concept>
|
|
|
|
<!-- All 2.7.x new features go under here -->
|
|
|
|
<concept rev="2.7.0" id="new_features_270">
|
|
|
|
<title>New Features in <keyword keyref="impala27_full"/></title>
|
|
|
|
<conbody>
|
|
|
|
<ul id="feature_list">
|
|
<li>
|
|
<p>
|
|
Performance improvements:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<p rev="IMPALA-3206">
|
|
[<xref keyref="IMPALA-3206">IMPALA-3206</xref>]
|
|
Speedup for queries against <codeph>DECIMAL</codeph> columns in Avro tables.
|
|
The code that parses <codeph>DECIMAL</codeph> values from Avro now uses
|
|
native code generation.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3674">
|
|
[<xref keyref="IMPALA-3674">IMPALA-3674</xref>]
|
|
Improved efficiency in LLVM code generation can reduce codegen time, especially
|
|
for short queries.
|
|
</p>
|
|
</li>
|
|
<!-- Not actually a new feature, it's more a tip about when to expect remote reads and how to minimize them. To go somewhere in the performance / best practices / Parquet info.
|
|
<li>
|
|
<p rev="IMPALA-3885">
|
|
[<xref keyref="IMPALA-3885">IMPALA-3885</xref>]
|
|
Parquet files with multiple blocks can now be processed
|
|
without remote reads.
|
|
</p>
|
|
</li>
|
|
-->
|
|
<li>
|
|
<p rev="IMPALA-2979">
|
|
[<xref keyref="IMPALA-2979">IMPALA-2979</xref>]
|
|
Improvements to scheduling on worker nodes,
|
|
enabled by the <codeph>REPLICA_PREFERENCE</codeph> query option.
|
|
See <xref
|
|
href="impala_replica_preference.xml#replica_preference"/> for details.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li audience="hidden">
|
|
<p rev="IMPALA-3210"><!-- Patch didn't make it into in <keyword keyref="impala27_full"/> -->
|
|
[<xref keyref="IMPALA-3210">IMPALA-3210</xref>]
|
|
The analytic functions <codeph>FIRST_VALUE()</codeph> and <codeph>LAST_VALUE()</codeph>
|
|
accept a new clause, <codeph>IGNORE NULLS</codeph>.
|
|
See <xref href="impala_analytic_functions.xml#first_value"/>
|
|
and <xref href="impala_analytic_functions.xml#last_value"/>
|
|
for details.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-1683">
|
|
[<xref keyref="IMPALA-1683">IMPALA-1683</xref>]
|
|
The <codeph>REFRESH</codeph> statement can be applied to a single partition,
|
|
rather than the entire table. See <xref href="impala_refresh.xml#refresh"/>
|
|
and <xref href="impala_partitioning.xml#partition_refresh"/> for details.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Improvements to the Impala web user interface:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<p rev="IMPALA-2767">
|
|
[<xref keyref="IMPALA-2767">IMPALA-2767</xref>]
|
|
You can now force a session to expire by clicking a link in the web UI,
|
|
on the <uicontrol>/sessions</uicontrol> tab.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3715">
|
|
[<xref keyref="IMPALA-3715">IMPALA-3715</xref>]
|
|
The <uicontrol>/memz</uicontrol> tab includes more information about
|
|
Impala memory usage.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3716">
|
|
[<xref keyref="IMPALA-3716">IMPALA-3716</xref>]
|
|
The <uicontrol>Details</uicontrol> page for a query now includes
|
|
a <uicontrol>Memory</uicontrol> tab.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3499">
|
|
[<xref keyref="IMPALA-3499">IMPALA-3499</xref>]
|
|
Scalability improvements to the catalog server. Impala handles internal communication
|
|
more efficiently for tables with large numbers of columns and partitions, where the
|
|
size of the metadata exceeds 2 GiB.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3677">
|
|
[<xref keyref="IMPALA-3677">IMPALA-3677</xref>]
|
|
You can send a <codeph>SIGUSR1</codeph> signal to any Impala-related daemon to write a
|
|
Breakpad minidump. For advanced troubleshooting, you can now produce a minidump
|
|
without triggering a crash. See <xref href="impala_breakpad.xml#breakpad"/> for
|
|
details about the Breakpad minidump feature.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3687">
|
|
[<xref keyref="IMPALA-3687">IMPALA-3687</xref>]
|
|
The schema reconciliation rules for Avro tables have changed slightly
|
|
for <codeph>CHAR</codeph> and <codeph>VARCHAR</codeph> columns. Now, if
|
|
the definition of such a column is changed in the Avro schema file,
|
|
the column retains its <codeph>CHAR</codeph> or <codeph>VARCHAR</codeph>
|
|
type as specified in the SQL definition, but the column name and comment
|
|
from the Avro schema file take precedence.
|
|
See <xref href="impala_avro.xml#avro_create_table"/> for details about
|
|
column definitions in Avro tables.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3575">
|
|
[<xref keyref="IMPALA-3575">IMPALA-3575</xref>]
|
|
Some network
|
|
operations now have additional timeout and retry settings. The extra
|
|
configuration helps avoid failed queries for transient network
|
|
problems, to avoid hangs when a sender or receiver fails in the
|
|
middle of a network transmission, and to make cancellation requests
|
|
more reliable despite network issues. </p>
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
</concept>
|
|
<!-- All 2.6.x new features go under here -->
|
|
|
|
<concept rev="2.6.0" id="new_features_260">
|
|
|
|
<title>New Features in <keyword keyref="impala26_full"/></title>
|
|
|
|
<conbody>
|
|
|
|
<ul>
|
|
<li>
|
|
<p>
|
|
Improvements to Impala support for the Amazon S3 filesystem:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<p rev="IMPALA-1878">
|
|
Impala can now write to S3 tables through the <codeph>INSERT</codeph>
|
|
or <codeph>LOAD DATA</codeph> statements.
|
|
See <xref href="impala_s3.xml#s3"/> for general information about
|
|
using Impala with S3.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3452">
|
|
A new query option, <codeph>S3_SKIP_INSERT_STAGING</codeph>, lets you
|
|
trade off between fast <codeph>INSERT</codeph> performance and
|
|
slower <codeph>INSERT</codeph>s that are more consistent if a
|
|
problem occurs during the statement. The new behavior is enabled by default.
|
|
See <xref href="impala_s3_skip_insert_staging.xml#s3_skip_insert_staging"/> for details
|
|
about this option.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>
|
|
<p rev="">
|
|
Performance improvements for the runtime filtering feature:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<p rev="IMPALA-3333">
|
|
The default for the <codeph>RUNTIME_FILTER_MODE</codeph>
|
|
query option is changed to <codeph>GLOBAL</codeph> (the highest setting).
|
|
See <xref href="impala_runtime_filter_mode.xml#runtime_filter_mode"/> for
|
|
details about this option.
|
|
</p>
|
|
</li>
|
|
<li rev="IMPALA-3007">
|
|
<p>
|
|
The <codeph>RUNTIME_BLOOM_FILTER_SIZE</codeph> setting is now only used
|
|
as a fallback if statistics are not available; otherwise, Impala
|
|
uses the statistics to estimate the appropriate size to use for each filter.
|
|
See <xref href="impala_runtime_bloom_filter_size.xml#runtime_bloom_filter_size"/> for
|
|
details about this option.
|
|
</p>
|
|
</li>
|
|
<li rev="IMPALA-3480">
|
|
<p>
|
|
New query options <codeph>RUNTIME_FILTER_MIN_SIZE</codeph> and
|
|
<codeph>RUNTIME_FILTER_MAX_SIZE</codeph> let you fine-tune
|
|
the sizes of the Bloom filter structures used for runtime filtering.
|
|
If the filter size derived from Impala internal estimates or from
|
|
the <codeph>RUNTIME_FILTER_BLOOM_SIZE</codeph> falls outside the size
|
|
range specified by these options, any too-small filter size is adjusted
|
|
to the minimum, and any too-large filter size is adjusted to the maximum.
|
|
See <xref href="impala_runtime_filter_min_size.xml#runtime_filter_min_size"/>
|
|
and <xref href="impala_runtime_filter_max_size.xml#runtime_filter_max_size"/>
|
|
for details about these options.
|
|
</p>
|
|
</li>
|
|
<li rev="IMPALA-2956">
|
|
<p>
|
|
Runtime filter propagation now applies to all the
|
|
operands of <codeph>UNION</codeph> and <codeph>UNION ALL</codeph>
|
|
operators.
|
|
</p>
|
|
</li>
|
|
<li rev="IMPALA-3077">
|
|
<p>
|
|
Runtime filters can now be produced during join queries even
|
|
when the join processing activates the spill-to-disk mechanism.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
See <xref href="impala_runtime_filtering.xml#runtime_filtering"/> for
|
|
general information about the runtime filtering feature.
|
|
</li>
|
|
<!-- Have to look closer at resource management / admission control to see if
|
|
there are any ripple effects from this default change. -->
|
|
<li>
|
|
<p rev="IMPALA-3199">
|
|
Admission control and dynamic resource pools are enabled by default.
|
|
See <xref href="impala_admission.xml#admission_control"/> for details
|
|
about admission control.
|
|
</p>
|
|
</li>
|
|
<!-- Below here are features that are pretty well taken care of already;
|
|
some of them didn't need much if any doc in the first place. -->
|
|
<li>
|
|
<p rev="IMPALA-3369">
|
|
Impala can now manually set column statistics,
|
|
using the <codeph>ALTER TABLE</codeph> statement with a
|
|
<codeph>SET COLUMN STATS</codeph> clause.
|
|
See <xref href="impala_perf_stats.xml#perf_column_stats_manual"/> for details.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3490 IMPALA-3581 IMPALA-2686">
|
|
Impala can now write lightweight <q>minidump</q> files, rather
|
|
than large core files, to save diagnostic information when
|
|
any of the Impala-related daemons crash. This feature uses the
|
|
open source <codeph>breakpad</codeph> framework.
|
|
See <xref href="impala_breakpad.xml#breakpad"/> for details.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
New query options improve interoperability with Parquet files:
|
|
<ul>
|
|
<li>
|
|
<p rev="IMPALA-2835">
|
|
The <codeph>PARQUET_FALLBACK_SCHEMA_RESOLUTION</codeph> query option
|
|
lets Impala locate columns within Parquet files based on
|
|
column name rather than ordinal position.
|
|
This enhancement improves interoperability with applications
|
|
that write Parquet files with a different order or subset of
|
|
columns than are used in the Impala table.
|
|
See <xref href="impala_parquet_fallback_schema_resolution.xml#parquet_fallback_schema_resolution"/>
|
|
for details.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2069">
|
|
The <codeph>PARQUET_ANNOTATE_STRINGS_UTF8</codeph> query option
|
|
makes Impala include the <codeph>UTF-8</codeph> annotation
|
|
metadata for <codeph>STRING</codeph>, <codeph>CHAR</codeph>,
|
|
and <codeph>VARCHAR</codeph> columns in Parquet files created
|
|
by <codeph>INSERT</codeph> or <codeph>CREATE TABLE AS SELECT</codeph>
|
|
statements.
|
|
See <xref href="impala_parquet_annotate_strings_utf8.xml#parquet_annotate_strings_utf8"/>
|
|
for details.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
See <xref href="impala_parquet.xml#parquet"/> for general information about working
|
|
with Parquet files.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Improvements to security and reduction in overhead for secure clusters:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<p rev="IMPALA-1928">
|
|
Overall performance improvements for secure clusters.
|
|
(TPC-H queries on a secure cluster were benchmarked
|
|
at roughly 3x as fast as the previous release.)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2660">
|
|
Impala now recognizes the <codeph>auth_to_local</codeph> setting,
|
|
specified through the HDFS configuration setting
|
|
<codeph>hadoop.security.auth_to_local</codeph>.
|
|
This feature is disabled by default; to enable it,
|
|
specify <codeph>--load_auth_to_local_rules=true</codeph>
|
|
in the <cmdname>impalad</cmdname> configuration settings.
|
|
See <xref href="impala_kerberos.xml#auth_to_local"/> for details.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2599">
|
|
Timing improvements in the mechanism for the <cmdname>impalad</cmdname>
|
|
daemon to acquire Kerberos tickets. This feature spreads out the overhead
|
|
on the KDC during Impala startup, especially for large clusters.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3554">
|
|
For Kerberized clusters, the Catalog service now uses
|
|
the Kerberos principal instead of the operating sytem user that runs
|
|
the <cmdname>catalogd</cmdname> daemon.
|
|
This eliminates the requirement to configure a <codeph>hadoop.user.group.static.mapping.overrides</codeph>
|
|
setting to put the OS user into the Sentry administrative group, on clusters where the principal
|
|
and the OS user name for this user are different.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3286">
|
|
Overall performance improvements for join queries, by using a prefetching mechanism
|
|
while building the in-memory hash table to evaluate join predicates.
|
|
See <xref href="impala_prefetch_mode.xml#prefetch_mode"/> for the query option
|
|
to control this optimization.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3397">
|
|
The <cmdname>impala-shell</cmdname> interpreter has a new command,
|
|
<codeph>SOURCE</codeph>, that lets you run a set of SQL statements
|
|
or other <cmdname>impala-shell</cmdname> commands stored in a file.
|
|
You can run additional <codeph>SOURCE</codeph> commands from inside
|
|
a file, to set up flexible sequences of statements for use cases
|
|
such as schema setup, ETL, or reporting.
|
|
See <xref href="impala_shell_commands.xml#shell_commands"/> for details
|
|
and <xref href="impala_shell_running_commands.xml#shell_running_commands"/>
|
|
for examples.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-1772">
|
|
The <codeph>millisecond()</codeph> built-in function lets you extract
|
|
the fractional seconds part of a <codeph>TIMESTAMP</codeph> value.
|
|
See <xref href="impala_datetime_functions.xml#datetime_functions"/> for details.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3092">
|
|
If an Avro table is created without column definitions in the
|
|
<codeph>CREATE TABLE</codeph> statement, and columns are later
|
|
added through <codeph>ALTER TABLE</codeph>, the resulting
|
|
table is now queryable. Missing values from the newly added
|
|
columns now default to <codeph>NULL</codeph>.
|
|
See <xref href="impala_avro.xml#avro"/> for general details about
|
|
working with Avro files.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
The mechanism for interpreting <codeph>DECIMAL</codeph> literals is
|
|
improved, no longer going through an intermediate conversion step
|
|
to <codeph>DOUBLE</codeph>:
|
|
<ul>
|
|
<li>
|
|
<p rev="IMPALA-3163">
|
|
Casting a <codeph>DECIMAL</codeph> value to <codeph>TIMESTAMP</codeph>
|
|
<codeph>DOUBLE</codeph> produces a more precise
|
|
value for the <codeph>TIMESTAMP</codeph> than formerly.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3439">
|
|
Certain function calls involving <codeph>DECIMAL</codeph> literals
|
|
now succeed, when formerly they failed due to lack of a function
|
|
signature with a <codeph>DOUBLE</codeph> argument.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="">
|
|
Faster runtime performance for <codeph>DECIMAL</codeph> constant
|
|
values, through improved native code generation for all combinations
|
|
of precision and scale.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
See <xref href="impala_decimal.xml#decimal"/> for details about the <codeph>DECIMAL</codeph> type.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3155">
|
|
Improved type accuracy for <codeph>CASE</codeph> return values.
|
|
If all <codeph>WHEN</codeph> clauses of the <codeph>CASE</codeph>
|
|
expression are of <codeph>CHAR</codeph> type, the final result
|
|
is also <codeph>CHAR</codeph> instead of being converted to
|
|
<codeph>STRING</codeph>.
|
|
See <xref href="impala_conditional_functions.xml#conditional_functions"/>
|
|
for details about the <codeph>CASE</codeph> function.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3232">
|
|
Uncorrelated queries using the <codeph>NOT EXISTS</codeph> operator
|
|
are now supported. Formerly, the <codeph>NOT EXISTS</codeph>
|
|
operator was only available for correlated subqueries.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2736">
|
|
Improved performance for reading Parquet files.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3375">
|
|
Improved performance for <term>top-N</term> queries, that is,
|
|
those including both <codeph>ORDER BY</codeph> and
|
|
<codeph>LIMIT</codeph> clauses.
|
|
</p>
|
|
</li>
|
|
<!-- JIRA still in open state as of 5.8 / 2.6, commenting out.
|
|
<li>
|
|
<p rev="IMPALA-3471">
|
|
A top-N query can now also activate the spill-to-disk mechanism if
|
|
a host runs low on memory while evaluating it. For example, using
|
|
large <codeph>LIMIT</codeph> and/or <codeph>OFFSET</codeph> clauses
|
|
adds some memory overhead that could cause spilling.
|
|
</p>
|
|
</li>
|
|
-->
|
|
<li>
|
|
<p rev="IMPALA-1740">
|
|
Impala optionally skips an arbitrary number of header lines from text input
|
|
files on HDFS based on the <codeph>skip.header.line.count</codeph> value
|
|
in the <codeph>TBLPROPERTIES</codeph> field of the table metadata.
|
|
See <xref href="impala_txtfile.xml#text_data_files"/> for details.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2336">
|
|
Trailing comments are now allowed in queries processed by
|
|
the <cmdname>impala-shell</cmdname> options <codeph>-q</codeph>
|
|
and <codeph>-f</codeph>.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2844">
|
|
Impala can run <codeph>COUNT</codeph> queries for RCFile tables
|
|
that include complex type columns.
|
|
See <xref href="impala_complex_types.xml#complex_types"/> for
|
|
general information about working with complex types,
|
|
and <xref href="impala_array.xml#array"/>,
|
|
<xref href="impala_map.xml#map"/>, and <xref href="impala_struct.xml#struct"/>
|
|
for syntax details of each type.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
</concept>
|
|
|
|
<!-- All 2.5.x new features go under here -->
|
|
|
|
<concept rev="2.5.0" id="new_features_250">
|
|
|
|
<title>New Features in <keyword keyref="impala25_full"/></title>
|
|
|
|
<conbody>
|
|
|
|
<ul>
|
|
<li><!-- Spec: https://docs.google.com/document/d/1ambtYJ1t05iITCVIrN6N1A-e7PZBSetBPgjy8SLzJrA/edit#heading=h.vcftzwlpn845 -->
|
|
<p rev="IMPALA-2552 IMPALA-3054">
|
|
Dynamic partition pruning. When a query refers to a partition key column in a <codeph>WHERE</codeph>
|
|
clause, and the exact set of column values are not known until the query is executed,
|
|
Impala evaluates the predicate and skips the I/O for entire partitions that are not needed.
|
|
For example, if a table was partitioned by year, Impala would apply this technique to a query
|
|
such as <codeph>SELECT c1 FROM partitioned_table WHERE year = (SELECT MAX(year) FROM other_table)</codeph>.
|
|
<ph audience="standalone">See <xref href="impala_partitioning.xml#dynamic_partition_pruning"/> for details.</ph>
|
|
</p>
|
|
<p>
|
|
The dynamic partition pruning optimization technique lets Impala avoid reading
|
|
data files from partitions that are not part of the result set, even when
|
|
that determination cannot be made in advance. This technique is especially valuable
|
|
when performing join queries involving partitioned tables. For example, if a join
|
|
query includes an <codeph>ON</codeph> clause and a <codeph>WHERE</codeph> clause
|
|
that refer to the same columns, the query can find the set of column values that
|
|
match the <codeph>WHERE</codeph> clause, and only scan the associated partitions
|
|
when evaluating the <codeph>ON</codeph> clause.
|
|
</p>
|
|
<p>
|
|
Dynamic partition pruning is controlled by the same settings as the runtime filtering feature.
|
|
By default, this feature is enabled at a medium level, because the maximum setting can use
|
|
slightly more memory for queries than in previous releases.
|
|
To fully enable this feature, set the query option <codeph>RUNTIME_FILTER_MODE=GLOBAL</codeph>.
|
|
</p>
|
|
</li>
|
|
<li><!-- Spec: https://docs.google.com/document/d/1ambtYJ1t05iITCVIrN6N1A-e7PZBSetBPgjy8SLzJrA/edit#heading=h.vcftzwlpn845 -->
|
|
<p rev="IMPALA-2419 IMPALA-3001 IMPALA-3008 IMPALA-3039 IMPALA-3046 IMPALA-3054">
|
|
Runtime filtering. This is a wide-ranging set of optimizations that are especially valuable for join queries.
|
|
Using the same technique as with dynamic partition pruning,
|
|
Impala uses the predicates from <codeph>WHERE</codeph> and <codeph>ON</codeph> clauses
|
|
to determine the subset of column values from one of the joined tables could possibly be part of the
|
|
result set. Impala sends a compact representation of the filter condition to the hosts in the cluster,
|
|
instead of the full set of values or the entire table.
|
|
<ph audience="PDF">See <xref href="impala_runtime_filtering.xml#runtime_filtering"/> for details.</ph>
|
|
</p>
|
|
<p>
|
|
By default, this feature is enabled at a medium level, because the maximum setting can use
|
|
slightly more memory for queries than in previous releases.
|
|
To fully enable this feature, set the query option <codeph>RUNTIME_FILTER_MODE=GLOBAL</codeph>.
|
|
<ph audience="PDF">See <xref href="impala_runtime_filter_mode.xml#runtime_filter_mode"/> for details.</ph>
|
|
</p>
|
|
<p>
|
|
This feature involves some new query options:
|
|
<xref audience="standalone" href="impala_runtime_filter_mode.xml">RUNTIME_FILTER_MODE</xref><codeph audience="integrated">RUNTIME_FILTER_MODE</codeph>,
|
|
<xref audience="standalone" href="impala_max_num_runtime_filters.xml">MAX_NUM_RUNTIME_FILTERS</xref><codeph audience="integrated">MAX_NUM_RUNTIME_FILTERS</codeph>,
|
|
<xref audience="standalone" href="impala_runtime_bloom_filter_size.xml">RUNTIME_BLOOM_FILTER_SIZE</xref><codeph audience="integrated">RUNTIME_BLOOM_FILTER_SIZE</codeph>,
|
|
<xref audience="standalone" href="impala_runtime_filter_wait_time_ms.xml">RUNTIME_FILTER_WAIT_TIME_MS</xref><codeph audience="integrated">RUNTIME_FILTER_WAIT_TIME_MS</codeph>,
|
|
and <xref audience="standalone" href="impala_disable_row_runtime_filtering.xml">DISABLE_ROW_RUNTIME_FILTERING</xref><codeph audience="integrated">DISABLE_ROW_RUNTIME_FILTERING</codeph>.
|
|
<ph audience="PDF">See
|
|
<xref href="impala_runtime_filter_mode.xml#runtime_filter_mode">RUNTIME_FILTER_MODE</xref>,
|
|
<xref href="impala_max_num_runtime_filters.xml#max_num_runtime_filters">MAX_NUM_RUNTIME_FILTERS</xref>,
|
|
<xref href="impala_runtime_bloom_filter_size.xml#runtime_bloom_filter_size">RUNTIME_BLOOM_FILTER_SIZE</xref>,
|
|
<xref href="impala_runtime_filter_wait_time_ms.xml#runtime_filter_wait_time_ms">RUNTIME_FILTER_WAIT_TIME_MS</xref>, and
|
|
<xref href="impala_disable_row_runtime_filtering.xml#disable_row_runtime_filtering">DISABLE_ROW_RUNTIME_FILTERING</xref>
|
|
for details.
|
|
</ph>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2696">
|
|
More efficient use of the HDFS caching feature, to avoid
|
|
hotspots and bottlenecks that could occur if heavily used
|
|
cached data blocks were always processed by the same host.
|
|
By default, Impala now randomizes which host processes each cached
|
|
HDFS data block, when cached replicas are available on multiple hosts.
|
|
(Remember to use the <codeph>WITH REPLICATION</codeph> clause with the
|
|
<codeph>CREATE TABLE</codeph> or <codeph>ALTER TABLE</codeph> statement
|
|
when enabling HDFS caching for a table or partition, to cache the same
|
|
data blocks across multiple hosts.)
|
|
The new query option <codeph>SCHEDULE_RANDOM_REPLICA</codeph>
|
|
<!-- and <codeph>REPLICA_PREFERENCE</codeph> -->
|
|
lets you fine-tune the interaction with HDFS caching even more.
|
|
<ph audience="PDF">See <xref href="impala_perf_hdfs_caching.xml#hdfs_caching"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2641">
|
|
The <codeph>TRUNCATE TABLE</codeph> statement now accepts an <codeph>IF EXISTS</codeph>
|
|
clause, making <codeph>TRUNCATE TABLE</codeph> easier to use in setup or ETL scripts where the table might or
|
|
might not exist.
|
|
<ph audience="PDF">See <xref href="impala_truncate_table.xml#truncate_table"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2681 IMPALA-2688 IMPALA-2749">
|
|
Improved performance and reliability for the <codeph>DECIMAL</codeph> data type:
|
|
<ul>
|
|
<li>
|
|
<p rev="IMPALA-2681">
|
|
Using <codeph>DECIMAL</codeph> values in a <codeph>GROUP BY</codeph> clause now
|
|
triggers the native code generation optimization, speeding up queries that
|
|
group by values such as prices.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2688">
|
|
Checking for overflow in <codeph>DECIMAL</codeph>
|
|
multiplication is now substantially faster, making <codeph>DECIMAL</codeph>
|
|
a more practical data type in some use cases where formerly <codeph>DECIMAL</codeph>
|
|
was much slower than <codeph>FLOAT</codeph> or <codeph>DOUBLE</codeph>.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2749">
|
|
Multiplying a mixture of <codeph>DECIMAL</codeph>
|
|
and <codeph>FLOAT</codeph> or <codeph>DOUBLE</codeph> values now returns the
|
|
<codeph>DOUBLE</codeph> rather than <codeph>DECIMAL</codeph>. This change avoids
|
|
some cases where an intermediate value would underflow or overflow and become
|
|
<codeph>NULL</codeph> unexpectedly.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
<ph audience="PDF">See <xref href="impala_decimal.xml"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2382">
|
|
For UDFs written in Java, or Hive UDFs reused for Impala,
|
|
Impala now allows parameters and return values to be primitive types.
|
|
Formerly, these things were required to be one of the <q>Writable</q>
|
|
object types.
|
|
<ph audience="PDF">See <xref href="impala_udf.xml#udfs_hive"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-1588"><!-- This is from 2015, so perhaps it's really in an earlier release. -->
|
|
Performance improvements for HDFS I/O. Impala now caches HDFS file handles to avoid the
|
|
overhead of repeatedly opening the same file.
|
|
</p>
|
|
</li>
|
|
|
|
<!-- Kudu didn't make it into 2.5 / 5.7 release, so no DELETE or UPDATE statement. -->
|
|
<li>
|
|
<p><!-- Is there a JIRA for that one? Alex? -->
|
|
Performance improvements for queries involving nested complex types.
|
|
Certain basic query types, such as counting the elements of a complex column,
|
|
now use an optimized code path.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p rev="IMPALA-3044 IMPALA-2538 IMPALA-1168">
|
|
Improvements to the memory reservation mechanism for the Impala
|
|
admission control feature. You can specify more settings, such
|
|
as the timeout period and maximum aggregate memory used, for each
|
|
resource pool instead of globally for the Impala instance. The
|
|
default limit for concurrent queries (the <uicontrol>max requests</uicontrol>
|
|
setting) is now unlimited instead of 200.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p rev="IMPALA-1755">
|
|
Performance improvements related to code generation.
|
|
Even in queries where code generation is not performed
|
|
for some phases of execution (such as reading data from
|
|
Parquet tables), Impala can still use code generation in
|
|
other parts of the query, such as evaluating
|
|
functions in the <codeph>WHERE</codeph> clause.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-1305">
|
|
Performance improvements for queries using aggregation functions
|
|
on high-cardinality columns.
|
|
Formerly, Impala could do unnecessary extra work to produce intermediate
|
|
results for operations such as <codeph>DISTINCT</codeph> or <codeph>GROUP BY</codeph>
|
|
on columns that were unique or had few duplicate values.
|
|
Now, Impala decides at run time whether it is more efficient to
|
|
do an initial aggregation phase and pass along a smaller set of intermediate data,
|
|
or to pass raw intermediate data back to next phase of query processing to be aggregated there.
|
|
This feature is known as <term>streaming pre-aggregation</term>.
|
|
In case of performance regression, this feature can be turned off
|
|
using the <codeph>DISABLE_STREAMING_PREAGGREGATIONS</codeph> query option.
|
|
<ph audience="PDF">See <xref href="impala_disable_streaming_preaggregations.xml#disable_streaming_preaggregations"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Spill-to-disk feature now always recommended. In earlier releases, the spill-to-disk feature
|
|
could be turned off using a pair of configuration settings,
|
|
<codeph>enable_partitioned_aggregation=false</codeph> and
|
|
<codeph>enable_partitioned_hash_join=false</codeph>.
|
|
The latest improvements in the spill-to-disk mechanism, and related features that
|
|
interact with it, make this feature robust enough that disabling it is now
|
|
no longer needed or supported. In particular, some new features in <keyword keyref="impala25_full"/>
|
|
and higher do not work when the spill-to-disk feature is disabled.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-1067">
|
|
Improvements to scripting capability for the <cmdname>impala-shell</cmdname> command,
|
|
through user-specified substitution variables that can appear in statements processed
|
|
by <cmdname>impala-shell</cmdname>:
|
|
</p>
|
|
<ul>
|
|
<li rev="IMPALA-2179">
|
|
<p>
|
|
The <codeph>--var</codeph> command-line option lets you pass key-value pairs to
|
|
<cmdname>impala-shell</cmdname>. The shell can substitute the values
|
|
into queries before executing them, where the query text contains the notation
|
|
<codeph>${var:<varname>varname</varname>}</codeph>. For example, you might prepare a SQL file
|
|
containing a set of DDL statements and queries containing variables for
|
|
database and table names, and then pass the applicable names as part of the
|
|
<codeph>impala-shell -f <varname>filename</varname></codeph> command.
|
|
<ph audience="PDF">See <xref href="impala_shell_running_commands.xml#shell_running_commands"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
<li rev="IMPALA-2180">
|
|
<p>
|
|
The <codeph>SET</codeph> and <codeph>UNSET</codeph> commands within the
|
|
<cmdname>impala-shell</cmdname> interpreter now work with user-specified
|
|
substitution variables, as well as the built-in query options.
|
|
The two kinds of variables are divided in the <codeph>SET</codeph> output.
|
|
As with variables defined by the <codeph>--var</codeph> command-line option,
|
|
you refer to the user-specified substitution variables in queries by using
|
|
the notation <codeph>${var:<varname>varname</varname>}</codeph>
|
|
in the query text. Because the substitution variables are processed by
|
|
<cmdname>impala-shell</cmdname> instead of the <cmdname>impalad</cmdname>
|
|
backend, you cannot define your own substitution variables through the
|
|
<codeph>SET</codeph> statement in a JDBC or ODBC application.
|
|
<ph audience="PDF">See <xref href="impala_set.xml#set"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-1599">
|
|
Performance improvements for query startup. Impala better parallelizes certain work
|
|
when coordinating plan distribution between <cmdname>impalad</cmdname> instances, which improves
|
|
startup time for queries involving tables with many partitions on large clusters,
|
|
or complicated queries with many plan fragments.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2560">
|
|
Performance and scalability improvements for tables with many partitions.
|
|
The memory requirements on the coordinator node are reduced, making it substantially
|
|
faster and less resource-intensive
|
|
to do joins involving several tables with thousands of partitions each.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-3095">
|
|
Whitelisting for access to internal APIs. For applications that need direct access
|
|
to Impala APIs, without going through the HiveServer2 or Beeswax interfaces, you can
|
|
specify a list of Kerberos users who are allowed to call those APIs. By default, the
|
|
<codeph>impala</codeph> and <codeph>hdfs</codeph> users are the only ones authorized
|
|
for this kind of access.
|
|
Any users not explicitly authorized through the <codeph>internal_principals_whitelist</codeph>
|
|
configuration setting are blocked from accessing the APIs. This setting applies to all the
|
|
Impala-related daemons, although currently it is primarily used for HDFS to control the
|
|
behavior of the catalog server.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="">
|
|
Improvements to Impala integration and usability for Hue. (The code changes
|
|
are actually on the Hue side.)
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<p rev="">
|
|
The list of tables now refreshes dynamically.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-1787">
|
|
Usability improvements for case-insensitive queries.
|
|
You can now use the operators <codeph>ILIKE</codeph> and <codeph>IREGEXP</codeph>
|
|
to perform case-insensitive wildcard matches or regular expression matches,
|
|
rather than explicitly converting column values with <codeph>UPPER</codeph>
|
|
or <codeph>LOWER</codeph>.
|
|
<ph audience="PDF">See <xref href="impala_operators.xml#ilike"/> and <xref href="impala_operators.xml#iregexp"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-1480">
|
|
Performance and reliability improvements for DDL and insert operations on partitioned tables with a large
|
|
number of partitions. Impala only re-evaluates metadata for partitions that are affected by
|
|
a DDL operation, not all partitions in the table. While a DDL or insert statement is in progress,
|
|
other Impala statements that attempt to modify metadata for the same table wait until the first one
|
|
finishes.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2867">
|
|
Reliability improvements for the <codeph>LOAD DATA</codeph> statement.
|
|
Previously, this statement would fail if the source HDFS directory
|
|
contained any subdirectories at all. Now, the statement ignores
|
|
any hidden subdirectories, for example <filepath>_impala_insert_staging</filepath>.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2147">
|
|
A new operator, <codeph>IS [NOT] DISTINCT FROM</codeph>, lets you compare values
|
|
and always get a <codeph>true</codeph> or <codeph>false</codeph> result,
|
|
even if one or both of the values are <codeph>NULL</codeph>.
|
|
The <codeph>IS NOT DISTINCT FROM</codeph> operator, or its equivalent
|
|
<codeph><=></codeph> notation, improves the efficiency of join queries that
|
|
treat key values that are <codeph>NULL</codeph> in both tables as equal.
|
|
<ph audience="PDF">See <xref href="impala_operators.xml#is_distinct_from"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-1934">
|
|
Security enhancements for the <cmdname>impala-shell</cmdname> command.
|
|
A new option, <codeph>--ldap_password_cmd</codeph>, lets you specify
|
|
a command to retrieve the LDAP password. The resulting password is
|
|
then used to authenticate the <cmdname>impala-shell</cmdname> command
|
|
with the LDAP server.
|
|
<ph audience="PDF">See <xref href="impala_shell_options.xml"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
The <codeph>CREATE TABLE AS SELECT</codeph> statement now accepts a
|
|
<codeph>PARTITIONED BY</codeph> clause, which lets you create a
|
|
partitioned table and insert data into it with a single statement.
|
|
<ph audience="PDF">See <xref href="impala_create_table.xml#create_table"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-1748">
|
|
User-defined functions (UDFs and UDAFs) written in C++ now persist automatically
|
|
when the <cmdname>catalogd</cmdname> daemon is restarted. You no longer
|
|
have to run the <codeph>CREATE FUNCTION</codeph> statements again after a restart.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2843">
|
|
User-defined functions (UDFs) written in Java can now persist
|
|
when the <cmdname>catalogd</cmdname> daemon is restarted, and can be shared
|
|
transparently between Impala and Hive. You must do a one-time operation to recreate these
|
|
UDFs using new <codeph>CREATE FUNCTION</codeph> syntax, without a signature for arguments
|
|
or the return value. Afterwards, you no longer have to run the <codeph>CREATE FUNCTION</codeph>
|
|
statements again after a restart.
|
|
Although Impala does not have visibility into the UDFs that implement the
|
|
Hive built-in functions, user-created Hive UDFs are now automatically available
|
|
for calling through Impala.
|
|
<ph audience="PDF">See <xref href="impala_create_function.xml#create_function"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<!-- Listed as fixed in 2.6.0. Is this item inappropriate or did it actually come from a different JIRA? -->
|
|
<p rev="IMPALA-2728">
|
|
Reliability enhancements for memory management. Some aggregation and join queries
|
|
that formerly might have failed with an out-of-memory error due to memory contention,
|
|
now can succeed using the spill-to-disk mechanism.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<!-- Same blurb is under Incompatible Changes. Turn into a conref. -->
|
|
<p rev="IMPALA-2070">
|
|
The <codeph>SHOW DATABASES</codeph> statement now returns two columns rather than one.
|
|
The second column includes the associated comment string, if any, for each database.
|
|
Adjust any application code that examines the list of databases and assumes the
|
|
result set contains only a single column.
|
|
<ph audience="PDF">See <xref href="impala_show.xml#show_databases"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2499">
|
|
A new optimization speeds up aggregation operations that involve only the partition key
|
|
columns of partitioned tables. For example, a query such as <codeph>SELECT COUNT(DISTINCT k), MIN(k), MAX(k) FROM t1</codeph>
|
|
can avoid reading any data files if <codeph>T1</codeph> is a partitioned table and <codeph>K</codeph>
|
|
is one of the partition key columns. Because this technique can produce different results in cases
|
|
where HDFS files in a partition are manually deleted or are empty, you must enable the optimization
|
|
by setting the query option <codeph>OPTIMIZE_PARTITION_KEY_SCANS</codeph>.
|
|
<ph audience="PDF">See <xref href="impala_optimize_partition_key_scans.xml"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
<li audience="hidden"><!-- All the other undocumented query options are not really new features for this release, so hiding this whole bullet. -->
|
|
<p>
|
|
Other new query options:
|
|
</p>
|
|
<ul>
|
|
<li audience="hidden"><!-- Actually from a long way back, just never documented. Not sure if appropriate to keep internal-only or expose. -->
|
|
<codeph>DISABLE_OUTERMOST_TOPN</codeph>
|
|
</li>
|
|
<li audience="hidden"><!-- Actually from a long way back, just never documented. Not sure if appropriate to keep internal-only or expose. -->
|
|
<codeph>RM_INITIAL_MEM</codeph>
|
|
</li>
|
|
<li audience="hidden"><!-- Seems to be related to writing sequence files, a capability not externalized at this time. -->
|
|
<codeph>SEQ_COMPRESSION_MODE</codeph>
|
|
</li>
|
|
<li audience="hidden"><!-- Actually, was only used for working around one JIRA. Being deprecated now in Impala 2.3 via IMPALA-2963. -->
|
|
<codeph>DISABLE_CACHED_READS</codeph>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-2196">
|
|
The <codeph>DESCRIBE</codeph> statement can now display metadata about a database, using the
|
|
syntax <codeph>DESCRIBE DATABASE <varname>db_name</varname></codeph>.
|
|
<ph audience="PDF">See <xref href="impala_describe.xml#describe"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p rev="IMPALA-1477">
|
|
The <codeph>uuid()</codeph> built-in function generates an
|
|
alphanumeric value that you can use as a guaranteed unique identifier.
|
|
The uniqueness applies even across tables, for cases where an ascending
|
|
numeric sequence is not suitable.
|
|
<ph audience="PDF">See <xref href="impala_misc_functions.xml#misc_functions"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
</concept>
|
|
|
|
<!-- All 2.4.x new features go under here -->
|
|
|
|
<concept rev="2.4.0" id="new_features_240">
|
|
|
|
<title>New Features in <keyword keyref="impala24_full"/></title>
|
|
|
|
<conbody>
|
|
|
|
<ul>
|
|
<li>
|
|
<p>
|
|
Impala can be used on the DSSD D5 Storage Appliance.
|
|
From a user perspective, the Impala features are the same as in <keyword keyref="impala23_full"/>.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
</concept>
|
|
|
|
<!-- All 2.3.x subsections go under here -->
|
|
|
|
<!-- Actually for 2.3 / 5.5, let's get away from doing a separate subhead for each maintenance release,
|
|
because in the normal course of events there will be nothing to add here until 5.6. If something new
|
|
needs to get noted, just add a new bullet with wording to indicate which 5.5.x release it applies to. -->
|
|
|
|
<concept rev="2.3.0" id="new_features_230">
|
|
|
|
<title>New Features in <keyword keyref="impala23_full"/></title>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
The following are the major new features in Impala 2.3.x. This major release
|
|
contains improvements to SQL syntax (particularly new support for complex types), performance,
|
|
manageability, security.
|
|
</p>
|
|
|
|
<ul>
|
|
|
|
<li>
|
|
<p>
|
|
Complex data types: <codeph>STRUCT</codeph>, <codeph>ARRAY</codeph>, and <codeph>MAP</codeph>. These
|
|
types can encode multiple named fields, positional items, or key-value pairs within a single column.
|
|
You can combine these types to produce nested types with arbitrarily deep nesting,
|
|
such as an <codeph>ARRAY</codeph> of <codeph>STRUCT</codeph> values,
|
|
a <codeph>MAP</codeph> where each key-value pair is an <codeph>ARRAY</codeph> of other <codeph>MAP</codeph> values,
|
|
and so on. Currently, complex data types are only supported for the Parquet file format.
|
|
<ph audience="PDF">See <xref href="impala_complex_types.xml#complex_types"/> for usage details and <xref href="impala_array.xml#array"/>, <xref href="impala_struct.xml#struct"/>, and <xref href="impala_map.xml#map"/> for syntax.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li rev="collevelauth">
|
|
<p>
|
|
Column-level authorization lets you define access to particular columns within a table,
|
|
rather than the entire table. This feature lets you reduce the reliance on creating views to
|
|
set up authorization schemes for subsets of information.
|
|
See <xref keyref="sg_hive_sql"/> for background details, and
|
|
<xref href="impala_grant.xml#grant"/> and <xref href="impala_revoke.xml#revoke"/> for Impala-specific syntax.
|
|
</p>
|
|
</li>
|
|
|
|
<li rev="IMPALA-1139">
|
|
<p>
|
|
The <codeph>TRUNCATE TABLE</codeph> statement removes all the data from a table without removing the table itself.
|
|
<ph audience="PDF">See <xref href="impala_truncate_table.xml#truncate_table"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li id="IMPALA-2015">
|
|
<p>
|
|
Nested loop join queries. Some join queries that formerly required equality comparisons can now use
|
|
operators such as <codeph><</codeph> or <codeph>>=</codeph>. This same join mechanism is used
|
|
internally to optimize queries that retrieve values from complex type columns.
|
|
<ph audience="PDF">See <xref href="impala_joins.xml#joins"/> for details about Impala join queries.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Reduced memory usage and improved performance and robustness for spill-to-disk feature.
|
|
<ph audience="PDF">See <xref href="impala_scalability.xml#spill_to_disk"/> for details about this feature.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li rev="IMPALA-1881">
|
|
<p>
|
|
Performance improvements for querying Parquet data files containing multiple row groups
|
|
and multiple data blocks:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<p> For files written by Hive, SparkSQL, and other Parquet MR writers
|
|
and spanning multiple HDFS blocks, Impala now scans the extra
|
|
data blocks locally when possible, rather than using remote
|
|
reads. </p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Impala queries benefit from the improved alignment of row groups with HDFS blocks for Parquet
|
|
files written by Hive, MapReduce, and other components. (Impala itself never writes
|
|
multiblock Parquet files, so the alignment change does not apply to Parquet files produced by Impala.)
|
|
These Parquet writers now add padding to Parquet files that they write to align row groups with HDFS blocks.
|
|
The <codeph>parquet.writer.max-padding</codeph> setting specifies the maximum number of bytes, by default
|
|
8 megabytes, that can be added to the file between row groups to fill the gap at the end of one block
|
|
so that the next row group starts at the beginning of the next block.
|
|
If the gap is larger than this size, the writer attempts to fit another entire row group in the remaining space.
|
|
Include this setting in the <filepath>hive-site</filepath> configuration file to influence Parquet files written by Hive,
|
|
or the <filepath>hdfs-site</filepath> configuration file to influence Parquet files written by all non-Impala components.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
<p audience="PDF">
|
|
See <xref href="impala_parquet.xml#parquet"/> for instructions about using Parquet data files
|
|
with Impala.
|
|
</p>
|
|
</li>
|
|
|
|
<li id="IMPALA-1660">
|
|
<p>
|
|
Many new built-in scalar functions, for convenience and enhanced portability of SQL that uses common industry extensions.
|
|
</p>
|
|
|
|
<p rev="IMPALA-1771">
|
|
Math functions<ph audience="PDF"> (see <xref href="impala_math_functions.xml#math_functions"/> for details)</ph>:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<codeph>ATAN2</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>COSH</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>COT</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>DCEIL</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>DEXP</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>DFLOOR</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>DLOG10</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>DPOW</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>DROUND</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>DSQRT</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>DTRUNC</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>FACTORIAL</codeph>, and corresponding <codeph>!</codeph> operator
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>FPOW</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>RADIANS</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>RANDOM</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>SINH</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>TANH</codeph>
|
|
</li>
|
|
</ul>
|
|
|
|
<p>
|
|
String functions<ph audience="PDF"> (see <xref href="impala_string_functions.xml#string_functions"/> for details)</ph>:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<codeph>BTRIM</codeph>
|
|
</li>
|
|
<li>
|
|
<codeph>CHR</codeph>
|
|
</li>
|
|
<li>
|
|
<codeph>REGEXP_LIKE</codeph>
|
|
</li>
|
|
<li>
|
|
<codeph>SPLIT_PART</codeph>
|
|
</li>
|
|
</ul>
|
|
|
|
<p>
|
|
Date and time functions<ph audience="PDF"> (see <xref href="impala_datetime_functions.xml#datetime_functions"/> for details)</ph>:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<codeph>INT_MONTHS_BETWEEN</codeph>
|
|
</li>
|
|
<li>
|
|
<codeph>MONTHS_BETWEEN</codeph>
|
|
</li>
|
|
<li>
|
|
<codeph>TIMEOFDAY</codeph>
|
|
</li>
|
|
<li>
|
|
<codeph>TIMESTAMP_CMP</codeph>
|
|
</li>
|
|
</ul>
|
|
|
|
<p>
|
|
Bit manipulation functions<ph audience="PDF"> (see <xref href="impala_bit_functions.xml#bit_functions"/> for details)</ph>:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<codeph>BITAND</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>BITNOT</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>BITOR</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>BITXOR</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>COUNTSET</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>GETBIT</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>ROTATELEFT</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>ROTATERIGHT</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>SETBIT</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>SHIFTLEFT</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>SHIFTRIGHT</codeph>
|
|
</li>
|
|
</ul>
|
|
<p>
|
|
Type conversion functions<ph audience="PDF"> (see <xref href="impala_conversion_functions.xml#conversion_functions"/> for details)</ph>:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<codeph>TYPEOF</codeph>
|
|
</li>
|
|
</ul>
|
|
<p>
|
|
The <codeph>effective_user()</codeph> function<ph audience="PDF"> (see <xref href="impala_misc_functions.xml#misc_functions"/> for details)</ph>.
|
|
</p>
|
|
</li>
|
|
|
|
<li id="IMPALA-2081">
|
|
<p>
|
|
New built-in analytic functions: <codeph>PERCENT_RANK</codeph>, <codeph>NTILE</codeph>,
|
|
<codeph>CUME_DIST</codeph>.
|
|
<ph audience="PDF">See <xref href="impala_analytic_functions.xml#analytic_functions"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li id="IMPALA-595">
|
|
<p>
|
|
The <codeph>DROP DATABASE</codeph> statement now works for a non-empty database.
|
|
When you specify the optional <codeph>CASCADE</codeph> clause, any tables in the
|
|
database are dropped before the database itself is removed.
|
|
<ph audience="PDF">See <xref href="impala_drop_database.xml#drop_database"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The <codeph>DROP TABLE</codeph> and <codeph>ALTER TABLE DROP PARTITION</codeph> statements have a new optional keyword, <codeph>PURGE</codeph>.
|
|
This keyword causes Impala to immediately remove the relevant HDFS data files rather than sending them to the HDFS trashcan.
|
|
This feature can help to avoid out-of-space errors on storage devices, and to avoid files being left behind in case of
|
|
a problem with the HDFS trashcan, such as the trashcan not being configured or being in a different HDFS encryption zone
|
|
than the data files.
|
|
<ph audience="PDF">See <xref href="impala_drop_table.xml#drop_table"/> and <xref href="impala_alter_table.xml#alter_table"/> for syntax.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li id="IMPALA-80">
|
|
<p>
|
|
The <cmdname>impala-shell</cmdname> command has a new feature for live progress reporting. This feature
|
|
is enabled through the <codeph>--live_progress</codeph> and <codeph>--live_summary</codeph>
|
|
command-line options, or during a session through the <codeph>LIVE_SUMMARY</codeph> and
|
|
<codeph>LIVE_PROGRESS</codeph> query options.
|
|
<ph audience="PDF">See <xref href="impala_live_progress.xml#live_progress"/> and <xref href="impala_live_summary.xml#live_summary"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The <cmdname>impala-shell</cmdname> command also now displays a random <q>tip of the day</q> when it starts.
|
|
</p>
|
|
</li>
|
|
|
|
<li id="IMPALA-1413">
|
|
<p>
|
|
The <cmdname>impala-shell</cmdname> option <codeph>-f</codeph> now recognizes a special filename
|
|
<codeph>-</codeph> to accept input from stdin.
|
|
<ph audience="PDF">See <xref href="impala_shell_options.xml#shell_options"/> for details about the options for running <cmdname>impala-shell</cmdname> in non-interactive mode.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li id="IMPALA-1963">
|
|
<p>
|
|
Format strings for the <codeph>unix_timestamp()</codeph> function can now include numeric timezone offsets.
|
|
<ph audience="PDF">See <xref href="impala_datetime_functions.xml#datetime_functions"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Impala can now run a specified command to obtain the password to decrypt a private-key PEM file,
|
|
rather than having the private-key file be unencrypted on disk.
|
|
<ph audience="PDF">See <xref href="impala_ssl.xml#ssl"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li id="IMPALA-859">
|
|
<p>
|
|
Impala components now can use SSL for more of their internal communication. SSL is used for
|
|
communication between all three Impala-related daemons when the configuration option
|
|
<codeph>ssl_server_certificate</codeph> is enabled. SSL is used for communication with client
|
|
applications when the configuration option <codeph>ssl_client_ca_certificate</codeph> is enabled.
|
|
<ph audience="PDF">See <xref href="impala_ssl.xml#ssl"/> for details.</ph>
|
|
</p>
|
|
<p>
|
|
Currently, you can only use one of server-to-server TLS/SSL encryption or Kerberos authentication.
|
|
This limitation is tracked by the issue
|
|
<xref keyref="IMPALA-2598">IMPALA-2598</xref>.
|
|
</p>
|
|
</li>
|
|
|
|
<li id="IMPALA-1829">
|
|
<p>
|
|
Improved flexibility for intermediate data types in user-defined aggregate functions (UDAFs).
|
|
<ph audience="PDF">See <xref href="impala_udf.xml#udafs"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
<p>
|
|
In <keyword keyref="impala232"/>, the bug fix for <xref keyref="IMPALA-2598">IMPALA-2598</xref>
|
|
removes the restriction on using both Kerberos and SSL for internal communication between Impala components.
|
|
</p>
|
|
|
|
<!-- End of new feature list for 2.3 / 5.5. -->
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<!-- All 2.2.x subsections go under here -->
|
|
|
|
<concept rev="2.2.0" id="new_features_220">
|
|
|
|
<title>New Features in <keyword keyref="impala28_full"/></title>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
The following are the major new features in <keyword keyref="impala22_full"/>. This release
|
|
contains improvements to performance, manageability, security, and SQL syntax.
|
|
</p>
|
|
|
|
<ul>
|
|
<li>
|
|
<p>
|
|
Several improvements to date and time features enable higher interoperability with Hive and other
|
|
database systems, provide more flexibility for handling time zones, and future-proof the handling of
|
|
<codeph>TIMESTAMP</codeph> values:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<p>
|
|
The <codeph>WITH REPLICATION</codeph> clause for the <codeph>CREATE TABLE</codeph> and
|
|
<codeph>ALTER TABLE</codeph> statements lets you control the replication factor for
|
|
HDFS caching for a specific table or partition. By default, each cached block is
|
|
only present on a single host, which can lead to CPU contention if the same host
|
|
processes each cached block. Increasing the replication factor lets Impala choose
|
|
different hosts to process different cached blocks, to better distribute the CPU load.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Startup flags for the <cmdname>impalad</cmdname> daemon enable a higher level of compatibility with
|
|
<codeph>TIMESTAMP</codeph> values written by Hive, and more flexibility for working with date and
|
|
time data using the local time zone instead of UTC. To enable these features, set the
|
|
<cmdname>impalad</cmdname> startup flags
|
|
<codeph>-use_local_tz_for_unix_timestamp_conversions=true</codeph> and
|
|
<codeph>-convert_legacy_hive_parquet_utc_timestamps=true</codeph>.
|
|
</p>
|
|
|
|
<p>
|
|
The <codeph>-use_local_tz_for_unix_timestamp_conversions</codeph> setting controls how the
|
|
<codeph>unix_timestamp()</codeph>, <codeph>from_unixtime()</codeph>, and <codeph>now()</codeph>
|
|
functions handle time zones. By default (when this setting is turned off), Impala considers all
|
|
<codeph>TIMESTAMP</codeph> values to be in the UTC time zone when converting to or from Unix time
|
|
values. When this setting is enabled, Impala treats <codeph>TIMESTAMP</codeph> values passed to or
|
|
returned from these functions to be in the local time zone. When this setting is enabled, take
|
|
particular care that all hosts in the cluster have the same timezone settings, to avoid
|
|
inconsistent results depending on which host reads or writes <codeph>TIMESTAMP</codeph> data.
|
|
</p>
|
|
|
|
<p>
|
|
The <codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> setting causes Impala to convert
|
|
<codeph>TIMESTAMP</codeph> values to the local time zone when it reads them from Parquet files
|
|
written by Hive. This setting only applies to data using the Parquet file format, where Impala can
|
|
use metadata in the files to reliably determine that the files were written by Hive. If in the
|
|
future Hive changes the way it writes <codeph>TIMESTAMP</codeph> data in Parquet, Impala will
|
|
automatically handle that new <codeph>TIMESTAMP</codeph> encoding.
|
|
</p>
|
|
|
|
<p>
|
|
See <xref href="impala_timestamp.xml#timestamp"/> for details about time zone handling and the
|
|
configuration options for Impala / Hive compatibility with Parquet format.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p conref="../shared/impala_common.xml#common/y2k38" />
|
|
|
|
<p>
|
|
See <xref href="impala_datetime_functions.xml#datetime_functions"/> for the current function
|
|
signatures.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The <codeph>SHOW FILES</codeph> statement lets you view the names and sizes of the files that make up
|
|
an entire table or a specific partition. See <xref href="impala_show.xml#show_files"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Impala can now run queries against Parquet data containing columns with complex or nested types, as
|
|
long as the query only refers to columns with scalar types.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Performance improvements for queries that include <codeph>IN()</codeph> operators and involve
|
|
partitioned tables.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<!-- Same text for this item in impala_fixed_issues.xml. Could turn into a conref. -->
|
|
<p>
|
|
The new <codeph>-max_log_files</codeph> configuration option specifies how many log files to keep at
|
|
each severity level. The default value is 10, meaning that Impala preserves the latest 10 log files for
|
|
each severity level (<codeph>INFO</codeph>, <codeph>WARNING</codeph>, and <codeph>ERROR</codeph>) for
|
|
each Impala-related daemon (<cmdname>impalad</cmdname>, <cmdname>statestored</cmdname>, and
|
|
<cmdname>catalogd</cmdname>). Impala checks to see if any old logs need to be removed based on the
|
|
interval specified in the <codeph>logbufsecs</codeph> setting, every 5 seconds by default. See
|
|
<xref href="impala_logging.xml#logs_rotate"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Redaction of sensitive data from Impala log files. This feature protects details such as credit card
|
|
numbers or tax IDs from administrators who see the text of SQL statements in the course of monitoring
|
|
and troubleshooting a Hadoop cluster. See <xref href="impala_logging.xml#redaction"/> for background
|
|
information for Impala users, and <xref keyref="sg_redaction"/> for usage details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Lineage information is available for data created or queried by Impala. This feature lets you track who
|
|
has accessed data through Impala SQL statements, down to the level of specific columns, and how data
|
|
has been propagated between tables. See <xref href="impala_lineage.xml#lineage"/> for background
|
|
information for Impala users, <xref keyref="datamgmt_impala_lineage_log"/> for usage details and
|
|
how to interpret the lineage information.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Impala tables and partitions can now be located on the Amazon Simple Storage Service (S3) filesystem,
|
|
for convenience in cases where data is already located in S3 and you prefer to query it in-place.
|
|
Queries might have lower performance than when the data files reside on HDFS, because Impala uses some
|
|
HDFS-specific optimizations. Impala can query data in S3, but cannot write to S3. Therefore, statements
|
|
such as <codeph>INSERT</codeph> and <codeph>LOAD DATA</codeph> are not available when the destination
|
|
table or partition is in S3. See <xref href="impala_s3.xml#s3"/> for details.
|
|
</p>
|
|
|
|
<note conref="../shared/impala_common.xml#common/s3_caveat" />
|
|
</li>
|
|
|
|
<li>
|
|
<!-- Only want the link out of the release notes to appear for HTML
|
|
(N.B. audience="PDF" means hide from PDF), and only in the HTML for the
|
|
integrated build where the topic is available for link resolution. -->
|
|
<p>
|
|
Improved support for HDFS encryption. The <codeph>LOAD DATA</codeph> statement now works when the
|
|
source directory and destination table are in different encryption zones. See
|
|
<xref keyref="cdh_sg_component_kms"/> for details about using HDFS encryption with
|
|
Impala.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Additional arithmetic function <codeph>mod()</codeph>. See
|
|
<xref href="impala_math_functions.xml#math_functions"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Flexibility to interpret <codeph>TIMESTAMP</codeph> values using the UTC time zone (the traditional
|
|
Impala behavior) or using the local time zone (for compatibility with <codeph>TIMESTAMP</codeph> values
|
|
produced by Hive).
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Enhanced support for ETL using tools such as Flume. Impala ignores temporary files typically produced
|
|
by these tools (filenames with suffixes <codeph>.copying</codeph> and <codeph>.tmp</codeph>).
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The CPU requirement for Impala, which had become more restrictive in Impala 2.0.x and 2.1.x, has now
|
|
been relaxed.
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/cpu_prereq" />
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Enhanced support for <codeph>CHAR</codeph> and <codeph>VARCHAR</codeph> types in the <codeph>COMPUTE
|
|
STATS</codeph> statement.
|
|
</p>
|
|
</li>
|
|
|
|
<li rev="">
|
|
<p>
|
|
The amount of memory required during setup for <q>spill to disk</q> operations is greatly reduced. This
|
|
enhancement reduces the chance of a memory-intensive join or aggregation query failing with an
|
|
out-of-memory error.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Several new conditional functions provide enhanced compatibility when porting code that uses industry
|
|
extensions. The new functions are: <codeph>isfalse()</codeph>, <codeph>isnotfalse()</codeph>,
|
|
<codeph>isnottrue()</codeph>, <codeph>istrue()</codeph>, <codeph>nonnullvalue()</codeph>, and
|
|
<codeph>nullvalue()</codeph>. See <xref href="impala_conditional_functions.xml#conditional_functions"/>
|
|
for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The Impala debug web UI now can display a visual representation of the query plan. On the
|
|
<uicontrol>/queries</uicontrol> tab, select <uicontrol>Details</uicontrol> for a particular query. The
|
|
<uicontrol>Details</uicontrol> page includes a <uicontrol>Plan</uicontrol> tab with a plan diagram that
|
|
you can zoom in or out (using scroll gestures through mouse wheel or trackpad).
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
|
|
<!-- End of new feature list for 5.4. -->
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<!-- All 2.1.x subsections go under here -->
|
|
|
|
<concept rev="2.1.0" id="new_features_210">
|
|
|
|
<title>New Features in <keyword keyref="impala21_full"/></title>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
This release contains the following enhancements to query performance and system scalability:
|
|
</p>
|
|
|
|
<ul>
|
|
<li>
|
|
<p>
|
|
Impala can now collect statistics for individual partitions in a partitioned table, rather than
|
|
processing the entire table for each <codeph>COMPUTE STATS</codeph> statement. This feature is known as
|
|
incremental statistics, and is controlled by the <codeph>COMPUTE INCREMENTAL STATS</codeph> syntax.
|
|
(You can still use the original <codeph>COMPUTE STATS</codeph> statement for nonpartitioned tables or
|
|
partitioned tables that are unchanging or whose contents are entirely replaced all at once.) See
|
|
<xref href="impala_compute_stats.xml#compute_stats"/> and
|
|
<xref href="impala_perf_stats.xml#perf_stats"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Optimization for small queries lets Impala process queries that process very few rows without the
|
|
unnecessary overhead of parallelizing and generating native code. Reducing this overhead lets Impala
|
|
clear small queries quickly, keeping YARN resources and admission control slots available for
|
|
data-intensive queries. The number of rows considered to be a <q>small</q> query is controlled by the
|
|
<codeph>EXEC_SINGLE_NODE_ROWS_THRESHOLD</codeph> query option. See
|
|
<xref href="impala_exec_single_node_rows_threshold.xml#exec_single_node_rows_threshold"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
An enhancement to the statestore component lets it transmit heartbeat information independently of
|
|
broadcasting metadata updates. This optimization improves reliability of health checking on large
|
|
clusters with many tables and partitions.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The memory requirement for querying gzip-compressed text is reduced. Now Impala decompresses the data
|
|
as it is read, rather than reading the entire gzipped file and decompressing it in memory.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<!-- All 2.0.x subsections go under here -->
|
|
|
|
<concept rev="2.0.0" id="new_features_200">
|
|
|
|
<title>New Features in <keyword keyref="impala20_full"/></title>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
The following are the major new features in <keyword keyref="impala20_full"/>. This major release
|
|
contains improvements to performance, scalability, security, and SQL syntax.
|
|
</p>
|
|
|
|
<ul>
|
|
<li>
|
|
<p>
|
|
Queries with joins or aggregation functions involving high volumes of data can now use temporary work
|
|
areas on disk, reducing the chance of failure due to out-of-memory errors. When the required memory for
|
|
the intermediate result set exceeds the amount available on a particular node, the query automatically
|
|
uses a temporary work area on disk. This <q>spill to disk</q> mechanism is similar to the <codeph>ORDER
|
|
BY</codeph> improvement from Impala 1.4. For details, see
|
|
<xref href="impala_scalability.xml#spill_to_disk"/>.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Subquery enhancements:
|
|
<ul>
|
|
<li>
|
|
Subqueries are now allowed in the <codeph>WHERE</codeph> clause, for example with the
|
|
<codeph>IN</codeph> operator.
|
|
</li>
|
|
|
|
<li>
|
|
The <codeph>EXISTS</codeph> and <codeph>NOT EXISTS</codeph> operators are available. They are
|
|
always used in conjunction with subqueries.
|
|
</li>
|
|
|
|
<li>
|
|
The <codeph>IN</codeph> and <codeph>NOT IN</codeph> queries can now operate on the result set from
|
|
a subquery, not just a hardcoded list of values.
|
|
</li>
|
|
|
|
<li>
|
|
Uncorrelated subqueries let you compare against one or more values for equality,
|
|
<codeph>IN</codeph>, and <codeph>EXISTS</codeph> comparisons. For example, you might use
|
|
<codeph>WHERE</codeph> clauses such as <codeph>WHERE <varname>column</varname> = (SELECT
|
|
MAX(<varname>some_other_column</varname> FROM <varname>table</varname>)</codeph> or <codeph>WHERE
|
|
<varname>column</varname> IN (SELECT <varname>some_other_column</varname> FROM
|
|
<varname>table</varname> WHERE <varname>conditions</varname>)</codeph>.
|
|
</li>
|
|
|
|
<li>
|
|
Correlated subqueries let you cross-reference values from the outer query block and the subquery.
|
|
</li>
|
|
|
|
<li>
|
|
Scalar subqueries let you substitute the result of single-value aggregate functions such as
|
|
<codeph>MAX()</codeph>, <codeph>MIN()</codeph>, <codeph>COUNT()</codeph>, or
|
|
<codeph>AVG()</codeph>, where you would normally use a numeric value in a <codeph>WHERE</codeph>
|
|
clause.
|
|
</li>
|
|
</ul>
|
|
</p>
|
|
|
|
<p>
|
|
For details about subqueries, see <xref href="impala_subqueries.xml#subqueries"/> For information about
|
|
new and improved operators, see <xref href="impala_operators.xml#exists"/> and
|
|
<xref href="impala_operators.xml#in"/>.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Analytic functions such as <codeph>RANK()</codeph>, <codeph>LAG()</codeph>, <codeph>LEAD()</codeph>,
|
|
and <codeph>FIRST_VALUE()</codeph> let you analyze sequences of rows with flexible ordering and
|
|
grouping. Existing aggregate functions such as <codeph>MAX()</codeph>, <codeph>SUM()</codeph>, and
|
|
<codeph>COUNT()</codeph> can also be used in an analytic context. See
|
|
<xref href="impala_analytic_functions.xml#analytic_functions"/> for details. See
|
|
<xref href="impala_aggregate_functions.xml#aggregate_functions"/> for enhancements to existing
|
|
aggregate functions.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
New data types provide greater compatibility with source code from traditional database systems:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<codeph>VARCHAR</codeph> is like the <codeph>STRING</codeph> data type, but with a maximum length.
|
|
See <xref href="impala_varchar.xml#varchar"/> for details.
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>CHAR</codeph> is like the <codeph>STRING</codeph> data type, but with a precise length. Short
|
|
values are padded with spaces on the right. See <xref href="impala_char.xml#char"/> for details.
|
|
</li>
|
|
|
|
<li audience="hidden">
|
|
<!-- This feature will be undocumented in Impala 2.0, probably ready for prime time in 2.1. -->
|
|
<codeph>DATE</codeph>. See <xref href="impala_date.xml#date"/> for details.
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Security enhancements:
|
|
<ul>
|
|
<li>
|
|
Formerly, Impala was restricted to using either Kerberos or LDAP / Active Directory authentication
|
|
within a cluster. Now, Impala can freely accept either kind of authentication request, allowing you
|
|
to set up some hosts with Kerberos authentication and others with LDAP or Active Directory. See
|
|
<xref href="impala_mixed_security.xml#mixed_security"/> for details.
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>GRANT</codeph> statement. See <xref href="impala_grant.xml#grant"/> for details.
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>REVOKE</codeph> statement. See <xref href="impala_revoke.xml#revoke"/> for details.
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>CREATE ROLE</codeph> statement. See <xref href="impala_create_role.xml#create_role"/> for
|
|
details.
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>DROP ROLE</codeph> statement. See <xref href="impala_drop_role.xml#drop_role"/> for
|
|
details.
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>SHOW ROLES</codeph> and <codeph>SHOW ROLE GRANT</codeph> statements. See
|
|
<xref href="impala_show.xml#show"/> for details.
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
To complement the HDFS encryption feature, a new Impala configuration option,
|
|
<codeph>--disk_spill_encryption</codeph> secures sensitive data from being observed or tampered
|
|
with when temporarily stored on disk.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
</p>
|
|
|
|
<p>
|
|
The new security-related SQL statements work along with the Sentry authorization framework. See
|
|
<xref keyref="authorization"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Impala can now read compressed text files compressed by gzip, bzip, or Snappy. These files do not
|
|
require any special table settings to work in an Impala text table. Impala recognizes the compression
|
|
type automatically based on file extensions of <codeph>.gz</codeph>, <codeph>.bz2</codeph>, and
|
|
<codeph>.snappy</codeph> respectively. These types of compressed text files are intended for
|
|
convenience with existing ETL pipelines. Their non-splittable nature means they are not optimal for
|
|
high-performance parallel queries. See <xref href="impala_txtfile.xml#gzip"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Query hints can now use comment notation, <codeph>/* +<varname>hint_name</varname> */</codeph> or
|
|
<codeph>-- +<varname>hint_name</varname></codeph>, at the same places in the query where the hints
|
|
enclosed by <codeph>[ ]</codeph> are recognized. This enhancement makes it easier to reuse Impala
|
|
queries on other database systems. See <xref href="impala_hints.xml#hints"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
A new query option, <codeph>QUERY_TIMEOUT_S</codeph>, lets you specify a timeout period in seconds for
|
|
individual queries.
|
|
</p>
|
|
|
|
<p>
|
|
The working of the <codeph>--idle_query_timeout</codeph> configuration option is extended. If no
|
|
<codeph>QUERY_OPTION_S</codeph> query option is in effect, <codeph>--idle_query_timeout</codeph> works
|
|
the same as before, setting the timeout interval. When the <codeph>QUERY_OPTION_S</codeph> query option
|
|
is specified, its maximum value is capped by the value of the <codeph>--idle_query_timeout</codeph>
|
|
option.
|
|
</p>
|
|
|
|
<p>
|
|
That is, the system administrator sets the default and maximum timeout through the
|
|
<codeph>--idle_query_timeout</codeph> startup option, and then individual users or applications can set
|
|
a lower timeout value if desired through the <codeph>QUERY_TIMEOUT_S</codeph> query option. See
|
|
<xref href="impala_timeouts.xml#timeouts"/> and
|
|
<xref href="impala_query_timeout_s.xml#query_timeout_s"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
New functions <codeph>VAR_SAMP()</codeph> and <codeph>VAR_POP()</codeph> are aliases for the existing
|
|
<codeph>VARIANCE_SAMP()</codeph> and <codeph>VARIANCE_POP()</codeph> functions.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
A new date and time function, <codeph>DATE_PART()</codeph>, provides similar functionality to
|
|
<codeph>EXTRACT()</codeph>. You can also call the <codeph>EXTRACT()</codeph> function using the SQL-99
|
|
syntax, <codeph>EXTRACT(<varname>unit</varname> FROM <varname>timestamp</varname>)</codeph>. These
|
|
enhancements simplify the porting process for date-related code from other systems. See
|
|
<xref href="impala_datetime_functions.xml#datetime_functions"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
New approximation features provide a fast way to get results when absolute precision is not required:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
The <codeph>APPX_COUNT_DISTINCT</codeph> query option lets Impala rewrite
|
|
<codeph>COUNT(DISTINCT)</codeph> calls to use <codeph>NDV()</codeph> instead, which speeds up the
|
|
operation and allows multiple <codeph>COUNT(DISTINCT)</codeph> operations in a single query. See
|
|
<xref href="impala_appx_count_distinct.xml#appx_count_distinct"/> for details.
|
|
</li>
|
|
</ul>
|
|
The <codeph>APPX_MEDIAN()</codeph> aggregate function produces an estimate for the median value of a
|
|
column by using sampling. See <xref href="impala_appx_median.xml#appx_median"/> for details.
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Impala now supports a <codeph>DECODE()</codeph> function. This function works as a shorthand for a
|
|
<codeph>CASE()</codeph> expression, and improves compatibility with SQL code containing vendor
|
|
extensions. See <xref href="impala_conditional_functions.xml#conditional_functions"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The <codeph>STDDEV()</codeph>, <codeph>STDDEV_POP()</codeph>, <codeph>STDDEV_SAMP()</codeph>,
|
|
<codeph>VARIANCE()</codeph>, <codeph>VARIANCE_POP()</codeph>, <codeph>VARIANCE_SAMP()</codeph>, and
|
|
<codeph>NDV()</codeph> aggregate functions now all return <codeph>DOUBLE</codeph> results rather than
|
|
<codeph>STRING</codeph>. Formerly, you were required to <codeph>CAST()</codeph> the result to a numeric
|
|
type before using it in arithmetic operations.
|
|
</p>
|
|
</li>
|
|
|
|
<li id="parquet_block_size">
|
|
<p>
|
|
The default settings for Parquet block size, and the associated <codeph>PARQUET_FILE_SIZE</codeph>
|
|
query option, are changed. Now, Impala writes Parquet files with a size of 256 MB and an HDFS block
|
|
size of 256 MB. Previously, Impala attempted to write Parquet files with a size of 1 GB and an HDFS
|
|
block size of 1 GB. In practice, Impala used a conservative estimate of the disk space needed for each
|
|
Parquet block, leading to files that were typically 512 MB anyway. Thus, this change will make the file
|
|
size more accurate if you specify a value for the <codeph>PARQUET_FILE_SIZE</codeph> query option. It
|
|
also reduces the amount of memory reserved during <codeph>INSERT</codeph> into Parquet tables,
|
|
potentially avoiding out-of-memory errors and improving scalability when inserting data into Parquet
|
|
tables.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Anti-joins are now supported, expressed using the <codeph>LEFT ANTI JOIN</codeph> and <codeph>RIGHT
|
|
ANTI JOIN</codeph> clauses.
|
|
<!-- Maybe RIGHT SEMI JOIN is new too? -->
|
|
<!-- Make following statement true in the context of RIGHT ANTI JOIN. -->
|
|
These clauses returns results from one table that have no match in the other table. You might use this
|
|
type of join in the same sorts of use cases as the <codeph>NOT EXISTS</codeph> and <codeph>NOT
|
|
IN</codeph> operators. See <xref href="impala_joins.xml#joins"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li audience="hidden">
|
|
<!-- This feature will be undocumented in Impala 2.0, probably ready for prime time in 2.1. -->
|
|
<p>
|
|
Improved file format support. Impala can now write to Avro, compressed text, SequenceFile, and RCFile
|
|
tables using the <codeph>INSERT</codeph> or <codeph>CREATE TABLE AS SELECT</codeph> statements. See
|
|
<xref href="impala_file_formats.xml#file_formats"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The <codeph>SET</codeph> command in <cmdname>impala-shell</cmdname> has been promoted to a real SQL
|
|
statement. You can now set query options such as <codeph>PARQUET_FILE_SIZE</codeph>,
|
|
<codeph>MEM_LIMIT</codeph>, and <codeph>SYNC_DDL</codeph> within JDBC, ODBC, or any other kind of
|
|
application that submits SQL without going through the <cmdname>impala-shell</cmdname> interpreter. See
|
|
<xref href="impala_set.xml#set"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The <cmdname>impala-shell</cmdname> interpreter now reads settings from an optional configuration file,
|
|
named <filepath>$HOME/.impalarc</filepath> by default. See
|
|
<xref href="impala_shell_options.xml#shell_config_file"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li audience="hidden">
|
|
<!-- This feature will be undocumented in Impala 2.0, probably ready for prime time in 2.1. -->
|
|
<p>
|
|
The <codeph>COMPUTE STATS</codeph> statement can now gather statistics for newly added partitions
|
|
rather than the entire table. This feature is known as <term>incremental statistics</term>. See
|
|
<xref href="impala_compute_stats.xml#compute_stats"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The library used for regular expression parsing has changed from Boost to Google RE2. This
|
|
implementation change adds support for non-greedy matches using the <codeph>.*?</codeph> notation. This
|
|
and other changes in the way regular expressions are interpreted means you might need to re-test
|
|
queries that use functions such as <codeph>regexp_extract()</codeph> or
|
|
<codeph>regexp_replace()</codeph>, or operators such as <codeph>REGEXP</codeph> or
|
|
<codeph>RLIKE</codeph>. See <xref href="impala_incompatible_changes.xml#incompatible_changes"/> for
|
|
those details.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept rev="1.4.0" id="new_features_140">
|
|
|
|
<title>New Features in <keyword keyref="impala14_full"/></title>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
The following are the major new features in <keyword keyref="impala14_full"/>:
|
|
</p>
|
|
|
|
<ul>
|
|
<li>
|
|
<p>
|
|
The <codeph>DECIMAL</codeph> data type lets you store fixed-precision values, for working with currency
|
|
or other fractional values where it is important to represent values exactly and avoid rounding errors.
|
|
This feature includes enhancements to built-in functions, numeric literals, and arithmetic expressions.
|
|
<ph audience="PDF">See <xref href="impala_decimal.xml#decimal"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Where the underlying HDFS support exists, Impala can take advantage of the HDFS caching feature to <q>pin</q> entire tables or
|
|
individual partitions in memory, to speed up queries on frequently accessed data and reduce the CPU
|
|
overhead of memory-to-memory copying. When HDFS files are cached in memory, Impala can read the cached
|
|
data without any disk reads, and without making an additional copy of the data in memory. Other Hadoop
|
|
components that read the same data files also experience a performance benefit.
|
|
</p>
|
|
|
|
<p audience="PDF">
|
|
For background information about HDFS caching, see
|
|
<xref keyref="setup_hdfs_caching"/>. For performance information about using this feature with Impala, see
|
|
<xref href="impala_perf_hdfs_caching.xml#hdfs_caching"/>. For the <codeph>SET CACHED</codeph> and
|
|
<codeph>SET UNCACHED</codeph> clauses that let you control cached table data through DDL statements,
|
|
see <xref href="impala_create_table.xml#create_table"/> and
|
|
<xref href="impala_alter_table.xml#alter_table"/>.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Impala can now use Sentry-based authorization based either on the original policy file, or on rules
|
|
defined by <codeph>GRANT</codeph> and <codeph>REVOKE</codeph> statements issued through Hive.
|
|
See <xref keyref="authorization"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
For interoperability with Parquet files created through other Hadoop components, such as Pig or
|
|
MapReduce jobs, you can create an Impala table that automatically sets up the column definitions based
|
|
on the layout of an existing Parquet data file. <ph audience="PDF">See
|
|
<xref href="impala_create_table.xml#create_table"/> for the syntax, and
|
|
<xref href="impala_parquet.xml#parquet_ddl"/> for usage information.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
<codeph>ORDER BY</codeph> queries no longer require a <codeph>LIMIT</codeph> clause. If the size of the
|
|
result set to be sorted exceeds the memory available to Impala, Impala uses a temporary work space on
|
|
disk to perform the sort operation. <ph audience="PDF">See <xref href="impala_order_by.xml#order_by"/>
|
|
for details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
LDAP connections can be secured through either SSL or TLS. <ph audience="PDF">See
|
|
<xref href="impala_ldap.xml#ldap"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The following new built-in scalar and aggregate functions are available:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<p>
|
|
A new built-in function, <codeph>EXTRACT()</codeph>, returns one date or time field from a
|
|
<codeph>TIMESTAMP</codeph> value. <ph audience="PDF">See
|
|
<xref href="impala_datetime_functions.xml#datetime_functions"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
A new built-in function, <codeph>TRUNC()</codeph>, truncates date/time values to a particular
|
|
granularity, such as year, month, day, hour, and so on. <ph audience="PDF">See
|
|
<xref href="impala_datetime_functions.xml#datetime_functions"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
<codeph>ADD_MONTHS()</codeph> built-in function, an alias for the existing
|
|
<codeph>MONTHS_ADD()</codeph> function. <ph audience="PDF">See
|
|
<xref href="impala_datetime_functions.xml#datetime_functions"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
A new built-in function, <codeph>ROUND()</codeph>, rounds <codeph>DECIMAL</codeph> values to a
|
|
specified number of fractional digits. <ph audience="PDF">See
|
|
<xref href="impala_math_functions.xml#math_functions"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Several built-in aggregate functions for computing properties for statistical distributions:
|
|
<codeph>STDDEV()</codeph>, <codeph>STDDEV_SAMP()</codeph>, <codeph>STDDEV_POP()</codeph>,
|
|
<codeph>VARIANCE()</codeph>, <codeph>VARIANCE_SAMP()</codeph>, and <codeph>VARIANCE_POP()</codeph>.
|
|
<ph audience="PDF">See <xref href="impala_stddev.xml#stddev"/> and
|
|
<xref href="impala_variance.xml#variance"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Several new built-in functions, such as <codeph>MAX_INT()</codeph>,
|
|
<codeph>MIN_SMALLINT()</codeph>, and so on, let you conveniently check whether data values are in
|
|
an expected range. You might be able to switch a column to a smaller type, saving memory during
|
|
processing. <ph audience="PDF">See <xref href="impala_math_functions.xml#math_functions"/> for
|
|
details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
New built-in functions, <codeph>IS_INF()</codeph> and <codeph>IS_NAN()</codeph>, check for the
|
|
special values infinity and <q>not a number</q>. These values could be specified as
|
|
<codeph>inf</codeph> or <codeph>nan</codeph> in text data files, or be produced by certain
|
|
arithmetic expressions. <ph audience="PDF">See
|
|
<xref href="impala_math_functions.xml#math_functions"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The <codeph>SHOW PARTITIONS</codeph> statement displays information about the structure of a
|
|
partitioned table. <ph audience="PDF">See <xref href="impala_show.xml#show"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li audience="hidden">
|
|
<!-- Not documenting for 1.4. Revisit in a future release. -->
|
|
<p>
|
|
Data sources. <ph audience="PDF">See <xref href="impala_data_sources.xml#data_sources"/> for
|
|
details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
New configuration options for the <cmdname>impalad</cmdname> daemon let you specify initial memory
|
|
usage for all queries. The initial resource requests handled by Llama and YARN can be expanded later if
|
|
needed, avoiding unnecessary over-allocation and reducing the chance of out-of-memory conditions.
|
|
<ph audience="PDF">See <xref href="impala_resource_management.xml#resource_management"/> for
|
|
details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
The Impala <codeph>CREATE TABLE</codeph> statement now has a <codeph>STORED AS AVRO</codeph> clause,
|
|
allowing you to create Avro tables through Impala. <ph audience="PDF">See
|
|
<xref href="impala_avro.xml#avro"/> for details and examples.</ph>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
New <cmdname>impalad</cmdname> configuration options let you fine-tune the calculations Impala makes to
|
|
estimate resource requirements for each query. These options can help avoid problems due to
|
|
overconsumption due to too-low estimates, or underutilization due to too-high estimates.
|
|
<ph audience="PDF">See <xref href="impala_resource_management.xml#resource_management"/> for
|
|
details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
A new <codeph>SUMMARY</codeph> command in the <cmdname>impala-shell</cmdname> interpreter provides a
|
|
high-level summary of the work performed at each stage of the explain plan. The summary is also
|
|
included in output from the <codeph>PROFILE</codeph> command. <ph audience="PDF">See
|
|
<xref href="impala_shell_commands.xml#shell_commands"/> and
|
|
<xref href="impala_explain_plan.xml#perf_summary"/> for details.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Performance improvements for the <codeph>COMPUTE STATS</codeph> statement:
|
|
</p>
|
|
<ul>
|
|
<!-- This particular change has been pushed out to a later release. -->
|
|
|
|
<li audience="hidden">
|
|
Certain simple aggregation operations (with no <codeph>GROUP BY</codeph> step) are multi-threaded if
|
|
spare cores are available.
|
|
</li>
|
|
|
|
<li>
|
|
The <codeph>NDV</codeph> function is speeded up through native code generation.
|
|
</li>
|
|
|
|
<li>
|
|
Because the <codeph>NULL</codeph> count is not currently used by the Impala query planner, in Impala
|
|
1.4.0 and higher, <codeph>COMPUTE STATS</codeph> does not count the <codeph>NULL</codeph> values for
|
|
each column. (The <codeph>#Nulls</codeph> field of the stats table is left as -1, signifying that the
|
|
value is unknown.)
|
|
</li>
|
|
</ul>
|
|
<p audience="PDF">
|
|
See <xref href="impala_compute_stats.xml#compute_stats"/> for general details about the <codeph>COMPUTE
|
|
STATS</codeph> statement, and <xref href="impala_perf_stats.xml#perf_stats"/> for how to use the
|
|
statistics to improve query performance.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Performance improvements for partition pruning. This feature reduces the time spent in query planning,
|
|
for partitioned tables with thousands of partitions. Previously, Impala typically queried tables with
|
|
up to approximately 3000 partitions. With the performance improvement in partition pruning, now Impala
|
|
can comfortably handle tables with tens of thousands of partitions. <ph audience="PDF">See
|
|
<xref href="impala_partitioning.xml#partition_pruning"/> for information about partition pruning.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The documentation provides additional guidance for planning tasks. <ph audience="PDF">See
|
|
<xref href="impala_planning.xml#planning"/>.</ph> <ph audience="PDF">In particular, see
|
|
<xref href="impala_cluster_sizing.xml#cluster_sizing"/> before you purchase or repurpose hardware for a
|
|
cluster to run Impala.</ph>
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The <cmdname>impala-shell</cmdname> interpreter now supports UTF-8 characters for input and output. You
|
|
can control whether <cmdname>impala-shell</cmdname> ignores invalid Unicode code points through the
|
|
<codeph>--strict_unicode</codeph> option. (Although this option is removed in Impala 2.0.)
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept rev="1.3.2" id="new_features_132">
|
|
|
|
<title>New Features in <keyword keyref="impala132"/></title>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
No new features. This point release is exclusively a bug fix release for the IMPALA-1019 issue related to
|
|
HDFS caching.
|
|
</p>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept rev="1.3.1" id="new_features_131">
|
|
|
|
<title>New Features in Impala 1.3.1</title>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
This point release is primarily a vehicle to deliver bug fixes. Any new features are minor changes
|
|
resulting from fixes for performance, reliability, or usability issues.
|
|
</p>
|
|
|
|
<ul>
|
|
<li>
|
|
<p>
|
|
A new <cmdname>impalad</cmdname> startup option, <codeph>--insert_inherit_permissions</codeph>, causes
|
|
Impala <codeph>INSERT</codeph> statements to create each new partition with the same HDFS permissions
|
|
as its parent directory. By default, <codeph>INSERT</codeph> statements create directories for new
|
|
partitions using default HDFS permissions. See <xref href="impala_insert.xml#insert"/> for examples of
|
|
<codeph>INSERT</codeph> statements for partitioned tables.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The <codeph>SHOW FUNCTIONS</codeph> statement now displays the return type of each function, in
|
|
addition to the types of its arguments. See <xref href="impala_show.xml#show"/> for examples.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
You can now specify the clause <codeph>FIELDS TERMINATED BY '\0'</codeph> with a <codeph>CREATE
|
|
TABLE</codeph> statement to use text data files that use ASCII 0 (<codeph>nul</codeph>) characters as a
|
|
delimiter. See <xref href="impala_txtfile.xml#txtfile"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p conref="../shared/impala_common.xml#common/regexp_matching" />
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept rev="1.3.0" id="new_features_130">
|
|
|
|
<title>New Features in <keyword keyref="impala13_full"/></title>
|
|
|
|
<conbody>
|
|
|
|
<ul>
|
|
<li>
|
|
<p>
|
|
The admission control feature lets you control and prioritize the volume and resource consumption of
|
|
concurrent queries. This mechanism reduces spikes in resource usage, helping Impala to run alongside
|
|
other kinds of workloads on a busy cluster. It also provides more user-friendly conflict resolution
|
|
when multiple memory-intensive queries are submitted concurrently, avoiding resource contention that
|
|
formerly resulted in out-of-memory errors. See <xref href="impala_admission.xml#admission_control"/>
|
|
for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Enhanced <codeph>EXPLAIN</codeph> plans provide more detail in an easier-to-read format. Now there are
|
|
four levels of verbosity: the <codeph>EXPLAIN_LEVEL</codeph> option can be set from 0 (most concise) to
|
|
3 (most verbose). See <xref href="impala_explain.xml#explain"/> for syntax and
|
|
<xref href="impala_explain_plan.xml#explain_plan"/> for usage information.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The <codeph>TIMESTAMP</codeph> data type accepts more kinds of input string formats through the
|
|
<codeph>UNIX_TIMESTAMP</codeph> function, and produces more varieties of string formats through the
|
|
<codeph>FROM_UNIXTIME</codeph> function. The documentation now also lists more functions for date
|
|
arithmetic, used for adding and subtracting <codeph>INTERVAL</codeph> expressions from
|
|
<codeph>TIMESTAMP</codeph> values. See <xref href="impala_datetime_functions.xml#datetime_functions"/>
|
|
for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
New conditional functions, <codeph>NULLIF()</codeph>, <codeph>NULLIFZERO()</codeph>, and
|
|
<codeph>ZEROIFNULL()</codeph>, simplify porting SQL containing vendor extensions to Impala. See
|
|
<xref href="impala_conditional_functions.xml#conditional_functions"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
New utility function, <codeph>CURRENT_DATABASE()</codeph>. See
|
|
<xref href="impala_misc_functions.xml#misc_functions"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Integration with the YARN resource management framework. This
|
|
feature makes use of the underlying YARN service, plus an additional service (Llama) that coordinates
|
|
requests to YARN for Impala resources, so that the Impala query only proceeds when all requested
|
|
resources are available. See <xref href="impala_resource_management.xml#resource_management"/> for full
|
|
details.
|
|
</p>
|
|
|
|
<p>
|
|
On the Impala side, this feature involves some new startup options for the <cmdname>impalad</cmdname>
|
|
daemon:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<codeph>-enable_rm</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>-llama_host</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>-llama_port</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>-llama_callback_port</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>-cgroup_hierarchy_path</codeph>
|
|
</li>
|
|
</ul>
|
|
<p>
|
|
For details of these startup options, see <xref href="impala_config_options.xml#config_options"/>.
|
|
</p>
|
|
|
|
<p>
|
|
This feature also involves several new or changed query options that you can set through the
|
|
<cmdname>impala-shell</cmdname> interpreter and apply within a specific session:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<codeph>MEM_LIMIT</codeph>: the function of this existing option changes when Impala resource
|
|
management is enabled.
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>REQUEST_POOL</codeph>: a new option. (Renamed to <codeph>RESOURCE_POOL</codeph> in Impala
|
|
1.3.0.)
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>V_CPU_CORES</codeph>: a new option.
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>RESERVATION_REQUEST_TIMEOUT</codeph>: a new option.
|
|
</li>
|
|
</ul>
|
|
<p>
|
|
For details of these query options, see <xref href="impala_resource_management.xml#rm_query_options"/>.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept rev="1.2.4" id="new_features_124">
|
|
|
|
<title>New Features in Impala 1.2.4</title>
|
|
|
|
<conbody>
|
|
|
|
<note>
|
|
Impala 1.2.4 is primarily a bug fix release for Impala 1.2.3, plus some performance
|
|
enhancements for the catalog server to minimize startup and DDL wait times for Impala deployments with
|
|
large numbers of databases, tables, and partitions.
|
|
</note>
|
|
|
|
<ul>
|
|
<li>
|
|
<p>
|
|
On Impala startup, the metadata loading and synchronization mechanism has been improved and optimized,
|
|
to give more responsiveness when starting Impala on a system with a large number of databases, tables,
|
|
or partitions. The initial metadata loading happens in the background, allowing queries to be run
|
|
before the entire process is finished. When a query refers to a table whose metadata is not yet loaded,
|
|
the query waits until the metadata for that table is loaded, and the load operation for that table is
|
|
prioritized to happen first.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Formerly, if you created a new table in Hive, you had to issue the <codeph>INVALIDATE METADATA</codeph>
|
|
statement (with no table name) which was an expensive operation that reloaded metadata for all tables.
|
|
Impala did not recognize the name of the Hive-created table, so you could not do <codeph>INVALIDATE
|
|
METADATA <varname>new_table</varname></codeph> to get the metadata for just that one table. Now, when
|
|
you issue <codeph>INVALIDATE METADATA <varname>table_name</varname></codeph>, Impala checks to see if
|
|
that name represents a table created in Hive, and if so recognizes the new table and loads the metadata
|
|
for it. Additionally, if the new table is in a database that was newly created in Hive, Impala also
|
|
recognizes the new database.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
If you issue <codeph>INVALIDATE METADATA <varname>table_name</varname></codeph> and the table has been
|
|
dropped through Hive, Impala will recognize that the table no longer exists.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
New startup options let you control the parallelism of the metadata loading during startup for the
|
|
<cmdname>catalogd</cmdname> daemon:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<p>
|
|
<codeph>--load_catalog_in_background</codeph> makes Impala load and cache metadata using background
|
|
threads after startup. It is <codeph>true</codeph> by default. Previously, a system with a large
|
|
number of databases, tables, or partitions could be unresponsive or even time out during startup.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
<codeph>--num_metadata_loading_threads</codeph> determines how much parallelism Impala devotes to
|
|
loading metadata in the background. The default is 16. You might increase this value for systems
|
|
with huge numbers of databases, tables, or partitions. You might lower this value for busy systems
|
|
that are CPU-constrained due to jobs from components other than Impala.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept rev="1.2.3" id="new_features_123">
|
|
|
|
<title>New Features in Impala 1.2.3</title>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
Impala 1.2.3 contains exactly the same feature set as Impala 1.2.2. Its only difference is one additional
|
|
fix for compatibility with Parquet files generated outside of Impala by components such as Hive, Pig, or
|
|
MapReduce. If you are upgrading from Impala 1.2.1 or earlier, see
|
|
<xref href="impala_new_features.xml#new_features_122"/> for the latest added features.
|
|
</p>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept rev="1.2.2" id="new_features_122">
|
|
|
|
<title>New Features in Impala 1.2.2</title>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
Impala 1.2.2 includes new features for performance, security, and flexibility. The major enhancements over
|
|
1.2.1 are performance related, primarily for join queries.
|
|
</p>
|
|
|
|
<p>
|
|
New user-visible features include:
|
|
</p>
|
|
|
|
<ul>
|
|
<li>
|
|
<p>
|
|
Join order optimizations. This highly valuable feature automatically distributes and parallelizes the
|
|
work for a join query to minimize disk I/O and network traffic. The automatic optimization reduces the
|
|
need to use query hints or to rewrite join queries with the tables in a specific order based on size or
|
|
cardinality. The new <codeph>COMPUTE STATS</codeph> statement gathers statistical information about
|
|
each table that is crucial for enabling the join optimizations. See
|
|
<xref href="impala_perf_joins.xml#perf_joins"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
<codeph>COMPUTE STATS</codeph> statement to collect both table statistics and column statistics with a
|
|
single statement. Intended to be more comprehensive, efficient, and reliable than the corresponding
|
|
Hive <codeph>ANALYZE TABLE</codeph> statement, which collects statistics in multiple phases through
|
|
MapReduce jobs. These statistics are important for query planning for join queries, queries on
|
|
partitioned tables, and other types of data-intensive operations. For optimal planning of join queries,
|
|
you need to collect statistics for each table involved in the join. See
|
|
<xref href="impala_compute_stats.xml#compute_stats"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Reordering of tables in a join query can be overridden by the <codeph>STRAIGHT_JOIN</codeph> operator,
|
|
allowing you to fine-tune the planning of the join query if necessary, by using the original technique
|
|
of ordering the joined tables in descending order of size. See
|
|
<xref href="impala_perf_joins.xml#straight_join"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The <codeph>CROSS JOIN</codeph> clause in the
|
|
<codeph><xref href="impala_select.xml#select">SELECT</xref></codeph> statement to allow Cartesian
|
|
products in queries, that is, joins without an equality comparison between columns in both tables.
|
|
Because such queries must be carefully checked to avoid accidental overconsumption of memory, you must
|
|
use the <codeph>CROSS JOIN</codeph> operator to explicitly select this kind of join. See
|
|
<xref href="impala_tutorial.xml#tut_cross_join"/> for examples.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The <codeph>ALTER TABLE</codeph> statement has new clauses that let you fine-tune table statistics. You
|
|
can use this technique as a less-expensive way to update specific statistics, in case the statistics
|
|
become stale, or to experiment with the effects of different data distributions on query planning.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
LDAP username/password authentication in JDBC/ODBC. See <xref href="impala_ldap.xml#ldap"/> for
|
|
details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
<xref href="impala_string_functions.xml#string_functions/group_concat">GROUP_CONCAT()</xref> aggregate
|
|
function to concatenate column values across all rows of a result set.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The <codeph>INSERT</codeph> statement now accepts hints, <codeph>[SHUFFLE]</codeph> and
|
|
<codeph>[NOSHUFFLE]</codeph>, to influence the way work is redistributed during
|
|
<codeph>INSERT...SELECT</codeph> operations. The hints are primarily useful for inserting into
|
|
partitioned Parquet tables, where using the <codeph>[SHUFFLE]</codeph> hint can avoid problems due to
|
|
memory consumption and simultaneous open files in HDFS, by collecting all the new data for each
|
|
partition on a specific node.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Several built-in functions and operators are now overloaded for more numeric data types, to reduce the
|
|
requirement to use <codeph>CAST()</codeph> for type coercion in <codeph>INSERT</codeph> statements. For
|
|
example, the expression <codeph>2+2</codeph> in an <codeph>INSERT</codeph> statement formerly produced
|
|
a <codeph>BIGINT</codeph> result, requiring a <codeph>CAST()</codeph> to be stored in an
|
|
<codeph>INT</codeph> variable. Now, addition, subtraction, and multiplication only produce a result
|
|
that is one step <q>bigger</q> than their arguments, and numeric and conditional functions can return
|
|
<codeph>SMALLINT</codeph>, <codeph>FLOAT</codeph>, and other smaller types rather than always
|
|
<codeph>BIGINT</codeph> or <codeph>DOUBLE</codeph>.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
New <codeph>fnv_hash()</codeph> built-in function for constructing hashed values. See
|
|
<xref href="impala_math_functions.xml#math_functions"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The clause <codeph>STORED AS PARQUET</codeph> is accepted as an equivalent for <codeph>STORED AS
|
|
PARQUETFILE</codeph>. This more concise form is recommended for new code.
|
|
</p>
|
|
</li>
|
|
</ul>
|
|
|
|
<p>
|
|
Because Impala 1.2.2 builds on a number of features introduced in 1.2.1, if you are upgrading from an older
|
|
1.1.x release straight to 1.2.2, also review <xref href="impala_new_features.xml#new_features_121"/> to see
|
|
features such as the <codeph>SHOW TABLE STATS</codeph> and <codeph>SHOW COLUMN STATS</codeph> statements,
|
|
and user-defined functions (UDFs).
|
|
</p>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept rev="1.2" id="new_features_121">
|
|
|
|
<title>New Features in Impala 1.2.1</title>
|
|
|
|
<conbody>
|
|
|
|
<note>
|
|
The Impala 1.2.1 feature set is a superset of features in the Impala 1.2.0 beta, with the
|
|
exception of resource management, which relies on resource management infrastructure in the
|
|
underlying Hadoop distribution.
|
|
</note>
|
|
|
|
<p>
|
|
Impala 1.2.1 includes new features for security, performance, and flexibility.
|
|
</p>
|
|
|
|
<p>
|
|
New user-visible features include:
|
|
</p>
|
|
|
|
<ul>
|
|
<li rev="1.2.1">
|
|
<p>
|
|
<codeph>SHOW TABLE STATS <varname>table_name</varname></codeph> and <codeph>SHOW COLUMN STATS
|
|
<varname>table_name</varname></codeph> statements, to verify that statistics are available and to see
|
|
the values used during query planning.
|
|
</p>
|
|
</li>
|
|
|
|
<li rev="1.2.1">
|
|
<p>
|
|
<codeph>CREATE TABLE AS SELECT</codeph> syntax, to create a new table and transfer data into it in a
|
|
single operation.
|
|
</p>
|
|
</li>
|
|
|
|
<li rev="1.2.1">
|
|
<p>
|
|
<codeph>OFFSET</codeph> clause, for use with the <codeph>ORDER BY</codeph> and <codeph>LIMIT</codeph>
|
|
clauses to produce <q>paged</q> result sets such as items 1-10, then 11-20, and so on.
|
|
</p>
|
|
</li>
|
|
|
|
<li rev="1.2.1">
|
|
<p>
|
|
<codeph>NULLS FIRST</codeph> and <codeph>NULLS LAST</codeph> clauses to ensure consistent placement of
|
|
<codeph>NULL</codeph> values in <codeph>ORDER BY</codeph> queries.
|
|
</p>
|
|
</li>
|
|
|
|
<li rev="1.2.1">
|
|
<p>
|
|
New <xref href="impala_functions.xml#builtins">built-in functions</xref>: <codeph>least()</codeph>,
|
|
<codeph>greatest()</codeph>, <codeph>initcap()</codeph>.
|
|
</p>
|
|
</li>
|
|
|
|
<li rev="1.2.1">
|
|
<p>
|
|
New aggregate function: <codeph>ndv()</codeph>, a fast alternative to <codeph>COUNT(DISTINCT
|
|
<varname>col</varname>)</codeph> returning an approximate result.
|
|
</p>
|
|
</li>
|
|
|
|
<li rev="1.2.1">
|
|
<p>
|
|
The <codeph>LIMIT</codeph> clause can now accept a numeric expression as an argument, rather than only
|
|
a literal constant.
|
|
</p>
|
|
</li>
|
|
|
|
<li rev="1.2.1">
|
|
<p>
|
|
The <codeph>SHOW CREATE TABLE</codeph> statement displays the end result of all the <codeph>CREATE
|
|
TABLE</codeph> and <codeph>ALTER TABLE</codeph> statements for a particular table. You can use the
|
|
output to produce a simplified setup script for a schema.
|
|
</p>
|
|
</li>
|
|
|
|
<li rev="1.2.1">
|
|
<p>
|
|
The <codeph>--idle_query_timeout</codeph> and <codeph>--idle_session_timeout</codeph> options for
|
|
<cmdname>impalad</cmdname> control the time intervals after which idle queries are cancelled, and idle
|
|
sessions expire. See <xref href="impala_timeouts.xml#timeouts"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
User-defined functions (UDFs). This feature lets you transform data in very flexible ways, which is
|
|
important when using Impala as part of an ETL or ELT pipeline. Prior to Impala 1.2, using UDFs required
|
|
switching into Hive. Impala 1.2 can run scalar UDFs and user-defined aggregate functions (UDAs). Impala
|
|
can run high-performance functions written in C++, or you can reuse existing Hive functions written in
|
|
Java.
|
|
</p>
|
|
|
|
<p>
|
|
You create UDFs through the <codeph>CREATE FUNCTION</codeph> statement and drop them through the
|
|
<codeph>DROP FUNCTION</codeph> statement. See <xref href="impala_udf.xml#udfs"/> for instructions about
|
|
coding, building, and deploying UDFs, and <xref href="impala_create_function.xml#create_function"/> and
|
|
<xref href="impala_drop_function.xml#drop_function"/> for related SQL syntax.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
A new service automatically propagates changes to table data and metadata made by one Impala node,
|
|
sending the new or updated metadata to all the other Impala nodes. The automatic synchronization
|
|
mechanism eliminates the need to use the <codeph>INVALIDATE METADATA</codeph> and
|
|
<codeph>REFRESH</codeph> statements after issuing Impala statements such as <codeph>CREATE
|
|
TABLE</codeph>, <codeph>ALTER TABLE</codeph>, <codeph>DROP TABLE</codeph>, <codeph>INSERT</codeph>, and
|
|
<codeph>LOAD DATA</codeph>.
|
|
</p>
|
|
|
|
<p>
|
|
For even more precise synchronization, you can enable the
|
|
<codeph><xref href="impala_sync_ddl.xml#sync_ddl">SYNC_DDL</xref></codeph> query option before issuing
|
|
a DDL, <codeph>INSERT</codeph>, or <codeph>LOAD DATA</codeph> statement. This option causes the
|
|
statement to wait, returning only after the catalog service has broadcast the applicable changes to all
|
|
Impala nodes in the cluster.
|
|
</p>
|
|
|
|
<note>
|
|
<p>
|
|
Because the catalog service only monitors operations performed through Impala, <codeph>INVALIDATE
|
|
METADATA</codeph> and <codeph>REFRESH</codeph> are still needed on the Impala side after creating new
|
|
tables or loading data through the Hive shell or by manipulating data files directly in HDFS. Because
|
|
the catalog service broadcasts the result of the <codeph>REFRESH</codeph> and <codeph>INVALIDATE
|
|
METADATA</codeph> statements to all Impala nodes, when you do need to use those statements, you can
|
|
do so a single time rather than on every Impala node.
|
|
</p>
|
|
</note>
|
|
|
|
<p>
|
|
This service is implemented by the <cmdname>catalogd</cmdname> daemon. See
|
|
<xref href="impala_components.xml#intro_catalogd"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The <codeph>CREATE TABLE</codeph> and <codeph>ALTER TABLE</codeph> statements have new clauses
|
|
<codeph>TBLPROPERTIES</codeph> and <codeph>WITH SERDEPROPERTIES</codeph>. The
|
|
<codeph>TBLPROPERTIES</codeph> clause lets you associate arbitrary items of metadata with a particular
|
|
table as key-value pairs. The <codeph>WITH SERDEPROPERTIES</codeph> clause lets you specify the
|
|
serializer/deserializer (SerDes) classes that read and write data for a table; although Impala does not
|
|
make use of these properties, sometimes particular values are needed for Hive compatibility. See
|
|
<xref href="impala_create_table.xml#create_table"/> and
|
|
<xref href="impala_alter_table.xml#alter_table"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Delegation support lets you authorize certain OS users associated with applications (for example,
|
|
<codeph>hue</codeph>), to submit requests using the credentials of other users.
|
|
See <xref href="impala_delegation.xml#delegation"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Enhancements to <codeph>EXPLAIN</codeph> output. In particular, when you enable the new
|
|
<codeph>EXPLAIN_LEVEL</codeph> query option, the <codeph>EXPLAIN</codeph> and <codeph>PROFILE</codeph>
|
|
statements produce more verbose output showing estimated resource requirements and whether table and
|
|
column statistics are available for the applicable tables and columns. See
|
|
<xref href="impala_explain.xml#explain"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li rev="1.2.1">
|
|
<p>
|
|
<codeph>SHOW CREATE TABLE</codeph> summarizes the effects of the original <codeph>CREATE TABLE</codeph>
|
|
statement and any subsequent <codeph>ALTER TABLE</codeph> statements, giving you a <codeph>CREATE
|
|
TABLE</codeph> statement that will re-create the current structure and layout for a table.
|
|
</p>
|
|
</li>
|
|
|
|
<li rev="1.2.1">
|
|
<p>
|
|
The <codeph>LIMIT</codeph> clause for queries now accepts an arithmetic expression, in addition to
|
|
numeric literals.
|
|
</p>
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept rev="1.2" id="new_features_120">
|
|
|
|
<title>New Features in Impala 1.2.0 (Beta)</title>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
The Impala 1.2.0 beta includes new features for security, performance, and flexibility.
|
|
</p>
|
|
|
|
<p>
|
|
New user-visible features include:
|
|
</p>
|
|
|
|
<ul>
|
|
<li>
|
|
<p>
|
|
User-defined functions (UDFs). This feature lets you transform data in very flexible ways, which is
|
|
important when using Impala as part of an ETL or ELT pipeline. Prior to Impala 1.2, using UDFs required
|
|
switching into Hive. Impala 1.2 can run scalar UDFs and user-defined aggregate functions (UDAs). Impala
|
|
can run high-performance functions written in C++, or you can reuse existing Hive functions written in
|
|
Java.
|
|
</p>
|
|
|
|
<p>
|
|
You create UDFs through the <codeph>CREATE FUNCTION</codeph> statement and drop them through the
|
|
<codeph>DROP FUNCTION</codeph> statement. See <xref href="impala_udf.xml#udfs"/> for instructions about
|
|
coding, building, and deploying UDFs, and <xref href="impala_create_function.xml#create_function"/> and
|
|
<xref href="impala_drop_function.xml#drop_function"/> for related SQL syntax.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
A new service automatically propagates changes to table data and metadata made by one Impala node,
|
|
sending the new or updated metadata to all the other Impala nodes. The automatic synchronization
|
|
mechanism eliminates the need to use the <codeph>INVALIDATE METADATA</codeph> and
|
|
<codeph>REFRESH</codeph> statements after issuing Impala statements such as <codeph>CREATE
|
|
TABLE</codeph>, <codeph>ALTER TABLE</codeph>, <codeph>DROP TABLE</codeph>, <codeph>INSERT</codeph>, and
|
|
<codeph>LOAD DATA</codeph>.
|
|
</p>
|
|
|
|
<note>
|
|
<p>
|
|
Because this service only monitors operations performed through Impala, <codeph>INVALIDATE
|
|
METADATA</codeph> and <codeph>REFRESH</codeph> are still needed on the Impala side after creating new
|
|
tables or loading data through the Hive shell or by manipulating data files directly in HDFS. Because
|
|
the catalog service broadcasts the result of the <codeph>REFRESH</codeph> and <codeph>INVALIDATE
|
|
METADATA</codeph> statements to all Impala nodes, when you do need to use those statements, you can
|
|
do so a single time rather than on every Impala node.
|
|
</p>
|
|
</note>
|
|
|
|
<p>
|
|
This service is implemented by the <cmdname>catalogd</cmdname> daemon. See
|
|
<xref href="impala_components.xml#intro_catalogd"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Integration with the YARN resource management framework. This
|
|
feature makes use of the underlying YARN service, plus an additional service (Llama) that coordinates
|
|
requests to YARN for Impala resources, so that the Impala query only proceeds when all requested
|
|
resources are available. See <xref href="impala_resource_management.xml#resource_management"/> for full
|
|
details.
|
|
</p>
|
|
|
|
<p>
|
|
On the Impala side, this feature involves some new startup options for the <cmdname>impalad</cmdname>
|
|
daemon:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<codeph>-enable_rm</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>-llama_host</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>-llama_port</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>-llama_callback_port</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>-cgroup_hierarchy_path</codeph>
|
|
</li>
|
|
</ul>
|
|
<p>
|
|
For details of these startup options, see <xref href="impala_config_options.xml#config_options"/>.
|
|
</p>
|
|
|
|
<p>
|
|
This feature also involves several new or changed query options that you can set through the
|
|
<cmdname>impala-shell</cmdname> interpreter and apply within a specific session:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<codeph>MEM_LIMIT</codeph>: the function of this existing option changes when Impala resource
|
|
management is enabled.
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>YARN_POOL</codeph>: a new option. (Renamed to <codeph>RESOURCE_POOL</codeph> in Impala
|
|
1.3.0.)
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>V_CPU_CORES</codeph>: a new option.
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>RESERVATION_REQUEST_TIMEOUT</codeph>: a new option.
|
|
</li>
|
|
</ul>
|
|
<p>
|
|
For details of these query options, see <xref href="impala_resource_management.xml#rm_query_options"/>.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
<codeph>CREATE TABLE ... AS SELECT</codeph> syntax, to create a table and copy data into it in a single
|
|
operation. See <xref href="impala_create_table.xml#create_table"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The <codeph>CREATE TABLE</codeph> and <codeph>ALTER TABLE</codeph> statements have a new
|
|
<codeph>TBLPROPERTIES</codeph> clause that lets you associate arbitrary items of metadata with a
|
|
particular table as key-value pairs. See <xref href="impala_create_table.xml#create_table"/> and
|
|
<xref href="impala_alter_table.xml#alter_table"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Delegation support lets you authorize certain OS users associated with applications (for example,
|
|
<codeph>hue</codeph>), to submit requests using the credentials of other users.
|
|
See <xref href="impala_delegation.xml#delegation"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
Enhancements to <codeph>EXPLAIN</codeph> output. In particular, when you enable the new
|
|
<codeph>EXPLAIN_LEVEL</codeph> query option, the <codeph>EXPLAIN</codeph> and <codeph>PROFILE</codeph>
|
|
statements produce more verbose output showing estimated resource requirements and whether table and
|
|
column statistics are available for the applicable tables and columns. See
|
|
<xref href="impala_explain.xml#explain"/> for details.
|
|
</p>
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept id="new_features_111">
|
|
|
|
<title>New Features in Impala 1.1.1</title>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
Impala 1.1.1 includes new features for security and stability.
|
|
</p>
|
|
|
|
<p>
|
|
New user-visible features include:
|
|
</p>
|
|
|
|
<ul>
|
|
<li>
|
|
Additional security feature: auditing. New startup options for <cmdname>impalad</cmdname> let you capture
|
|
information about Impala queries that succeed or are blocked due to insufficient privileges. For details,
|
|
see <xref href="impala_security.xml#security"/>.
|
|
</li>
|
|
|
|
<li>
|
|
Parquet data files generated by Impala 1.1.1 are now compatible with the Parquet support in Hive. See
|
|
<xref href="impala_incompatible_changes.xml#incompatible_changes"/> for the procedure to update older
|
|
Impala-created Parquet files to be compatible with the Hive Parquet support.
|
|
</li>
|
|
|
|
<li>
|
|
Additional improvements to stability and resource utilization for Impala queries.
|
|
</li>
|
|
|
|
<li>
|
|
Additional enhancements for compatibility with existing file formats.
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept id="new_features_11">
|
|
|
|
<title>New Features in Impala 1.1</title>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
Impala 1.1 includes new features for security, performance, and usability.
|
|
</p>
|
|
|
|
<p>
|
|
New user-visible features include:
|
|
</p>
|
|
|
|
<ul>
|
|
<li>
|
|
Extensive new security features, built on top of the Sentry open source project. Impala now supports
|
|
fine-grained authorization based on roles. A policy file determines which privileges on which schema
|
|
objects (servers, databases, tables, and HDFS paths) are available to users based on their membership in
|
|
groups. By assigning privileges for views, you can control access to table data at the column level. For
|
|
details, see <xref href="impala_security.xml#security"/>.
|
|
</li>
|
|
|
|
<li>
|
|
Impala can now create, alter, drop, and query views. Views provide a flexible way to set up simple
|
|
aliases for complex queries; hide query details from applications and users; and simplify maintenance as
|
|
you rename or reorganize databases, tables, and columns. See the overview section
|
|
<xref href="impala_views.xml#views"/> and the statements
|
|
<xref href="impala_create_view.xml#create_view"/>, <xref href="impala_alter_view.xml#alter_view"/>, and
|
|
<xref href="impala_drop_view.xml#drop_view"/>.
|
|
</li>
|
|
|
|
<li>
|
|
Performance is improved through a number of automatic optimizations. Resource consumption is also reduced
|
|
for Impala queries. These improvements apply broadly across all kinds of workloads and file formats. The
|
|
major areas of performance enhancement include:
|
|
<ul>
|
|
<li>
|
|
Improved disk and thread scheduling, which applies to all queries.
|
|
</li>
|
|
|
|
<li>
|
|
Improved hash join and aggregation performance, which applies to queries with large build tables or a
|
|
large number of groups.
|
|
</li>
|
|
|
|
<li>
|
|
Dictionary encoding with Parquet, which applies to Parquet tables with short string columns.
|
|
</li>
|
|
|
|
<li>
|
|
Improved performance on systems with SSDs, which applies to all queries and file formats.
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
|
|
<li>
|
|
Some new built-in functions are implemented:
|
|
<xref href="impala_string_functions.xml#string_functions/translate">translate()</xref> to substitute
|
|
characters within strings,
|
|
<!-- IMPALA-418 -->
|
|
<xref href="impala_misc_functions.xml#misc_functions/user">user()</xref> to check the login ID of the
|
|
connected user.
|
|
<!-- IMPALA-??? -->
|
|
</li>
|
|
|
|
<li>
|
|
The new <codeph>WITH</codeph> clause for <codeph>SELECT</codeph> statements lets you simplify complicated
|
|
queries in a way similar to creating a view. The effects of the <codeph>WITH</codeph> clause only last
|
|
for the duration of one query, unlike views, which are persistent schema objects that can be used by
|
|
multiple sessions or applications. See <xref href="impala_with.xml#with"/>.
|
|
</li>
|
|
|
|
<li>
|
|
An enhancement to <codeph>DESCRIBE</codeph> statement, <codeph>DESCRIBE FORMATTED
|
|
<varname>table_name</varname></codeph>, displays more detailed information about the table. This
|
|
information includes the file format, location, delimiter, ownership, external or internal, creation and
|
|
access times, and partitions. The information is returned as a result set that can be interpreted and
|
|
used by a management or monitoring application. See <xref href="impala_describe.xml#describe"/>.
|
|
</li>
|
|
|
|
<li>
|
|
You can now insert a subset of columns for a table, with other columns being left as all
|
|
<codeph>NULL</codeph> values. Or you can specify the columns in any order in the destination table,
|
|
rather than having to match the order of the corresponding columns in the source. <codeph>VALUES</codeph>
|
|
clause. This feature is known as <q>column permutation</q>. See <xref href="impala_insert.xml#insert"/>.
|
|
</li>
|
|
|
|
<li>
|
|
The new <codeph>LOAD DATA</codeph> statement lets you load data into a table directly from an HDFS data
|
|
file. This technique lets you minimize the number of steps in your ETL process, and provides more
|
|
flexibility. For example, you can bring data into an Impala table in one step. Formerly, you might have
|
|
created an external table where the data files are not entirely under your control, or copied the data
|
|
files to Impala data directories manually, or loaded the original data into one table and then used the
|
|
<codeph>INSERT</codeph> statement to copy it to a new table with a different file format, partitioning
|
|
scheme, and so on. See <xref href="impala_load_data.xml#load_data"/>.
|
|
</li>
|
|
|
|
<li>
|
|
Improvements to Impala-HBase integration:
|
|
<ul>
|
|
<li>
|
|
New query options for HBase performance:
|
|
<codeph><xref href="impala_hbase_cache_blocks.xml#hbase_cache_blocks">HBASE_CACHE_BLOCKS</xref></codeph>
|
|
and <codeph><xref href="impala_hbase_caching.xml#hbase_caching">HBASE_CACHING</xref></codeph>.
|
|
</li>
|
|
|
|
<li>
|
|
Support for binary data types in HBase tables. See <xref href="impala_hbase.xml#hbase_types"/> for
|
|
details.
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
|
|
<li>
|
|
You can issue <codeph>REFRESH</codeph> as a SQL statement through any of the programming interfaces that
|
|
Impala supports. <codeph>REFRESH</codeph> formerly had to be issued as a command through the
|
|
<cmdname>impala-shell</cmdname> interpreter, and was not available through a JDBC or ODBC API call. As
|
|
part of this change, the functionality of the <codeph>REFRESH</codeph> statement is divided between two
|
|
statements. In Impala 1.1, <codeph>REFRESH</codeph> requires a table name argument and immediately
|
|
reloads the metadata; the new <codeph>INVALIDATE METADATA</codeph> statement works the same as the Impala
|
|
1.0 <codeph>REFRESH</codeph> did: the table name argument is optional, and the metadata for one or all
|
|
tables is marked as stale, but not actually reloaded until the table is queried. When you create a new
|
|
table in the Hive shell or through a different Impala node, you must enter <codeph>INVALIDATE
|
|
METADATA</codeph> with no table parameter before you can see the new table in
|
|
<cmdname>impala-shell</cmdname>. See <xref href="impala_refresh.xml#refresh"/> and
|
|
<xref href="impala_invalidate_metadata.xml#invalidate_metadata"/>.
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept id="new_features_101">
|
|
|
|
<title>New Features in Impala 1.0.1</title>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
New user-visible features include:
|
|
</p>
|
|
|
|
<ul>
|
|
<li>
|
|
The <codeph>VALUES</codeph> clause lets you <codeph>INSERT</codeph> one or more rows using literals,
|
|
function return values, or other expressions. For performance and scalability, you should still use
|
|
<codeph>INSERT ... SELECT</codeph> for bringing large quantities of data into an Impala table. The
|
|
<codeph>VALUES</codeph> clause is a convenient way to set up small tables, particularly for initial
|
|
testing of SQL features that do not require large amounts of data. See
|
|
<xref href="impala_insert.xml#values"/> for details.
|
|
</li>
|
|
|
|
<li>
|
|
The <codeph>-B</codeph> and <codeph>-o</codeph> options of the <codeph>impala-shell</codeph> command can
|
|
turn query results into delimited text files and store them in an output file. The plain text results are
|
|
useful for using with other Hadoop components or Unix tools. In benchmark tests, it is also faster to
|
|
produce plain rather than pretty-printed results, and write to a file rather than to the screen, giving a
|
|
more accurate picture of the actual query time.
|
|
</li>
|
|
|
|
<li>
|
|
Several bug fixes. See <xref href="impala_fixed_issues.xml#fixed_issues_101"/> for details.
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept id="new_features_10">
|
|
|
|
<title>New Features in Impala 1.0</title>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
This version has multiple performance improvements and adds the following functionality:
|
|
</p>
|
|
|
|
<ul>
|
|
<li>
|
|
Several bug fixes. See <xref href="impala_fixed_issues.xml#fixed_issues_10"/>.
|
|
</li>
|
|
|
|
<li>
|
|
<codeph><xref href="impala_alter_table.xml#alter_table">ALTER TABLE</xref></codeph> statement.
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="impala_hints.xml#hints">Hints</xref> to allow specifying a particular join strategy.
|
|
</li>
|
|
|
|
<li>
|
|
<codeph><xref href="impala_refresh.xml#refresh">REFRESH</xref></codeph> for a single table.
|
|
</li>
|
|
|
|
<li>
|
|
Dynamic resource management, allowing high concurrency for Impala queries.
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept id="new_features_07">
|
|
|
|
<title>New Features in Version 0.7 of the Impala Beta Release</title>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
This version has multiple performance improvements and adds the following functionality:
|
|
</p>
|
|
|
|
<ul>
|
|
<li>
|
|
Several bug fixes. See <xref href="impala_fixed_issues.xml#fixed_issues_07"/>.
|
|
</li>
|
|
|
|
<li>
|
|
Support for the Parquet file format. For more information on file formats, see
|
|
<xref href="impala_file_formats.xml#file_formats"/>.
|
|
</li>
|
|
|
|
<li>
|
|
Added support for Avro.
|
|
</li>
|
|
|
|
<li>
|
|
Support for the memory limits. For more information, see the example on modifying memory limits in
|
|
<xref href="impala_config_options.xml#config_options"/>.
|
|
</li>
|
|
|
|
<li>
|
|
Bigger and faster joins through the addition of partitioned joins to the already supported broadcast
|
|
joins.
|
|
</li>
|
|
|
|
<li>
|
|
Fully distributed aggregations.
|
|
</li>
|
|
|
|
<li>
|
|
Fully distributed top-n computation.
|
|
</li>
|
|
|
|
<li>
|
|
Support for creating and altering tables.
|
|
</li>
|
|
|
|
<li>
|
|
Support for GROUP BY with floats and doubles.
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept id="new_features_06">
|
|
|
|
<title>New Features in Version 0.6 of the Impala Beta Release</title>
|
|
|
|
<conbody>
|
|
|
|
<ul>
|
|
<li>
|
|
Several bug fixes. See <xref href="impala_fixed_issues.xml#fixed_issues_06"/>.
|
|
</li>
|
|
|
|
<li>
|
|
Added support for Impala on SUSE and Debian/Ubuntu. Impala is now supported on:
|
|
<ul>
|
|
<li>
|
|
RHEL5.7/6.2 and Centos5.7/6.2
|
|
</li>
|
|
|
|
<li>
|
|
SUSE 11 with Service Pack 1 or higher
|
|
</li>
|
|
|
|
<li>
|
|
Ubuntu 10.04/12.04 and Debian 6.03
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
|
|
<li>
|
|
Support for the RCFile file format. For more information on file formats, see
|
|
<xref href="impala_file_formats.xml#file_formats">Understanding File Formats</xref>.
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept id="new_features_05">
|
|
|
|
<title>New Features in Version 0.5 of the Impala Beta Release</title>
|
|
|
|
<conbody>
|
|
|
|
<ul>
|
|
<li>
|
|
Several bug fixes. See <xref href="impala_fixed_issues.xml#fixed_issues_05"/>.
|
|
</li>
|
|
|
|
<li>
|
|
Added support for a JDBC driver that allows you to access Impala from a Java client. To use this feature,
|
|
follow the instructions in <xref href="impala_jdbc.xml#impala_jdbc"/> to install the JDBC
|
|
driver JARs on the client machine and modify the <codeph>CLASSPATH</codeph> on the client to include the
|
|
JARs.
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept id="new_features_04">
|
|
|
|
<title>New Features in Version 0.4 of the Impala Beta Release</title>
|
|
|
|
<conbody>
|
|
|
|
<ul>
|
|
<li>
|
|
Several bug fixes. See <xref href="impala_fixed_issues.xml#fixed_issues_04"/>.
|
|
</li>
|
|
|
|
<li>
|
|
Added support for Impala on RHEL5.7/Centos5.7. Impala is now supported on RHEL5.7/6.2 and Centos5.7/6.2.
|
|
</li>
|
|
|
|
<li>
|
|
The Impala debug webserver now has the ability to serve static files from
|
|
<codeph>${IMPALA_HOME}/www</codeph>. This can be disabled by setting
|
|
<codeph>--enable_webserver_doc_root=false</codeph> on the command line. As a result, Impala now uses the
|
|
Twitter Bootstrap library to style its debug webpages, and the <codeph>/queries</codeph> page now tracks
|
|
the last 25 queries run by each Impala daemon.
|
|
</li>
|
|
|
|
<li>
|
|
Additional metrics available on the Impala Debug Webpage.
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept id="new_features_03">
|
|
|
|
<title>New Features in Version 0.3 of the Impala Beta Release</title>
|
|
|
|
<conbody>
|
|
|
|
<ul>
|
|
<li>
|
|
Several bug fixes. See <xref href="impala_fixed_issues.xml#fixed_issues_03"/>.
|
|
</li>
|
|
|
|
<li>
|
|
The <codeph>state-store-service binary</codeph> has been renamed <codeph>statestored</codeph>.
|
|
</li>
|
|
|
|
<li>
|
|
The location of the Impala configuration files has changed from the <codeph>/usr/lib/impala/conf</codeph>
|
|
directory to the <codeph>/etc/impala/conf</codeph> directory.
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
<concept id="new_features_02">
|
|
|
|
<title>New Features in Version 0.2 of the Impala Beta Release</title>
|
|
|
|
<conbody>
|
|
|
|
<ul>
|
|
<li>
|
|
Several bug fixes. See <xref href="impala_fixed_issues.xml#fixed_issues_02"/>.
|
|
</li>
|
|
|
|
<li>
|
|
<b>Added Default Query Options</b> Default query options override all default QueryOption values when
|
|
starting <codeph>impalad</codeph>. The format is:
|
|
<codeblock>-default_query_options='key=value;key=value'</codeblock>
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|
|
|
|
</concept>
|