mirror of
https://github.com/apache/impala.git
synced 2026-02-01 03:00:22 -05:00
- Updated the timezone section - Added the sections on customizing timezone db and aliases Change-Id: Id400cda5a1be321063d17e0ee6337e92a5da732a Reviewed-on: http://gerrit.cloudera.org:8080/11946 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Csaba Ringhofer <csringhofer@cloudera.com>
417 lines
16 KiB
XML
417 lines
16 KiB
XML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<!--
|
|
Licensed to the Apache Software Foundation (ASF) under one
|
|
or more contributor license agreements. See the NOTICE file
|
|
distributed with this work for additional information
|
|
regarding copyright ownership. The ASF licenses this file
|
|
to you under the Apache License, Version 2.0 (the
|
|
"License"); you may not use this file except in compliance
|
|
with the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing,
|
|
software distributed under the License is distributed on an
|
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
KIND, either express or implied. See the License for the
|
|
specific language governing permissions and limitations
|
|
under the License.
|
|
-->
|
|
<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
|
|
<concept id="timestamp">
|
|
|
|
<title>TIMESTAMP Data Type</title>
|
|
|
|
<titlealts audience="PDF">
|
|
|
|
<navtitle>TIMESTAMP</navtitle>
|
|
|
|
</titlealts>
|
|
|
|
<prolog>
|
|
<metadata>
|
|
<data name="Category" value="Impala"/>
|
|
<data name="Category" value="Impala Data Types"/>
|
|
<data name="Category" value="SQL"/>
|
|
<data name="Category" value="Data Analysts"/>
|
|
<data name="Category" value="Developers"/>
|
|
<data name="Category" value="Dates and Times"/>
|
|
</metadata>
|
|
</prolog>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
The <codeph>TIMESTAMP</codeph> data type holds a value that represents a point in time.
|
|
</p>
|
|
|
|
<p>
|
|
Internally, the resolution of the time portion of a <codeph>TIMESTAMP</codeph> value is in
|
|
nanoseconds.
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/syntax_blurb"/>
|
|
|
|
<p>
|
|
In the column definition of a <codeph>CREATE TABLE</codeph> statement:
|
|
</p>
|
|
|
|
<codeblock><varname>column_name</varname> TIMESTAMP
|
|
|
|
<varname>timestamp</varname> [+ | -] INTERVAL <varname>interval</varname>
|
|
DATE_ADD (<varname>timestamp</varname>, INTERVAL <varname>interval</varname> <varname>time_unit</varname>)</codeblock>
|
|
|
|
<p>
|
|
<b>Range:</b> 1400-01-01 to 9999-12-31
|
|
</p>
|
|
|
|
<p>
|
|
Out of range <codeph>TIMESTAMP</codeph> values are converted to NULL.
|
|
</p>
|
|
|
|
<p>
|
|
The range of Impala <codeph>TIMESTAMP</codeph> is different from the Hive
|
|
<codeph>TIMESTAMP</codeph> type. Refer to
|
|
<xref
|
|
href="https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types#LanguageManualTypes-timestamp"
|
|
format="html" scope="external">Hive
|
|
documentation</xref> for detail.
|
|
</p>
|
|
|
|
<p>
|
|
<b>INTERVAL expressions:</b>
|
|
</p>
|
|
|
|
<p>
|
|
You can perform date arithmetic by adding or subtracting a specified number of time units,
|
|
using the <codeph>INTERVAL</codeph> keyword and the <codeph>+</codeph> operator, the
|
|
<codeph>-</codeph> operator, <codeph>date_add()</codeph> or <codeph>date_sub()</codeph>.
|
|
</p>
|
|
|
|
<p>
|
|
The following units are supported for <codeph><i>time_unit</i></codeph> in the
|
|
<codeph>INTERVAL</codeph> clause:
|
|
<ul>
|
|
<li>
|
|
<codeph>YEAR[S]</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>MONTH[S]</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>WEEK[S]</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>DAY[S]</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>HOUR[S]</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>MINUTE[S]</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>SECOND[S]</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>MILLISECOND[S]</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>MICROSECOND[S]</codeph>
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>NANOSECOND[S]</codeph>
|
|
</li>
|
|
</ul>
|
|
</p>
|
|
|
|
<p>
|
|
You can only specify one time unit in each interval expression, for example
|
|
<codeph>INTERVAL 3 DAYS</codeph> or <codeph>INTERVAL 25 HOURS</codeph>, but you can
|
|
produce any granularity by adding together successive <codeph>INTERVAL</codeph> values,
|
|
such as <codeph><varname>timestamp_value</varname> + INTERVAL 3 WEEKS - INTERVAL 1 DAY +
|
|
INTERVAL 10 MICROSECONDS</codeph>.
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/internals_16_bytes"/>
|
|
|
|
<p>
|
|
<b>Time zones:</b>
|
|
</p>
|
|
|
|
<p>
|
|
By default, Impala stores and interprets <codeph>TIMESTAMP</codeph> values in UTC time
|
|
zone when writing to data files, reading from data files, or converting to and from system
|
|
time values through functions.
|
|
</p>
|
|
|
|
<p>
|
|
When you set the <codeph>--use_local_tz_for_unix_timestamp_conversions</codeph> startup
|
|
flag to <codeph>TRUE</codeph>, Impala treats the <codeph>TIMESTAMP</codeph> values
|
|
specified in the local time zone. The local time zone is determined in the following order
|
|
with the <codeph>TIMESTAMP</codeph> query option takes the highest precedence:
|
|
<ol>
|
|
<li>
|
|
The <codeph>TIMESTAMP</codeph> query option
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>$TZ</codeph> environment variable
|
|
</li>
|
|
|
|
<li>
|
|
System time zone where the impalad coordinator runs
|
|
</li>
|
|
</ol>
|
|
</p>
|
|
|
|
<p> The <codeph>--use_local_tz_for_unix_timestamp_conversions</codeph>
|
|
setting can be used to fix discrepancy in <codeph>INTERVAL</codeph>
|
|
operations. For example, a <codeph>TIMESTAMP + INTERVAL
|
|
<varname>n-hours</varname></codeph> can be affected by Daylight Saving
|
|
Time, which Impala does not consider by default as these operations are
|
|
applied as if the timestamp was in UTC. You can use the
|
|
<codeph>--use_local_tz_for_unix_timestamp_conversions</codeph> setting
|
|
to fix the issue. </p>
|
|
<p>See <xref href="impala_custom_timezones.xml#custom_timezone"/> for
|
|
configuring to use custom time zone database and aliases.</p>
|
|
|
|
<p>
|
|
See <xref href="impala_datetime_functions.xml#datetime_functions">Impala Date and Time
|
|
Functions</xref> for the list of functions affected by the
|
|
<codeph>--use_local_tz_for_unix_timestamp_conversions</codeph> setting.
|
|
</p>
|
|
|
|
<p>
|
|
<b>Time zone handling between Impala and Hive:</b>
|
|
</p>
|
|
<p>Interoperability between Hive and Impala is different depending on the
|
|
file format.</p>
|
|
<ul>
|
|
<li><i>Text</i><p> For text tables, <codeph>TIMESTAMP</codeph> values can
|
|
be written and read interchangeably by Impala and Hive as Hive reads
|
|
and writes <codeph>TIMESTAMP</codeph> values without converting with
|
|
respect to time zones. </p></li>
|
|
<li><i>Parquet</i><p> When Hive writes to Parquet data files, the
|
|
<codeph>TIMESTAMP</codeph> values are normalized to UTC from the
|
|
local time zone of the host where the data was written. On the other
|
|
hand, Impala does not make any time zone adjustment when it writes or
|
|
reads <codeph>TIMESTAMP</codeph> values to Parquet files. This
|
|
difference in time zone handling can cause potentially inconsistent
|
|
results when Impala processes <codeph>TIMESTAMP</codeph> values in the
|
|
Parquet files written by Hive. </p><p> To avoid incompatibility
|
|
problems or having to code workarounds, you can specify one or both of
|
|
these impalad startup flags: <ul>
|
|
<li>
|
|
<codeph>--use_local_tz_for_unix_timestamp_conversions=true</codeph>
|
|
</li>
|
|
<li>
|
|
<codeph>--convert_legacy_hive_parquet_utc_timestamps=true</codeph>
|
|
</li>
|
|
</ul>
|
|
</p><p> When the
|
|
<codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> setting
|
|
is enabled, Impala recognizes the Parquet data files written by Hive,
|
|
and applies the same UTC-to-local-timezone conversion logic during the
|
|
query as Hive does. </p><p>In <keyword keyref="impala30"/> and lower,
|
|
this option had severe impact on multi-threaded performance. The new
|
|
time zone implementation in <keyword keyref="impala31"/> eliminated
|
|
most of the performance overhead and made Impala scale well to
|
|
multiple threads. Although
|
|
<codeph>-convert_legacy_hive_parquet_utc_timestamps</codeph> is
|
|
turned off by default for this performance reason, where practical
|
|
turn it on when processing <codeph>TIMESTAMP</codeph> columns in
|
|
Parquet files written by Hive, to avoid unexpected behavior. </p></li>
|
|
</ul>
|
|
|
|
<p>
|
|
<b>Conversions:</b>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/timestamp_conversions"
|
|
conrefend="../shared/impala_common.xml#common/cast_string_to_timestamp"/>
|
|
|
|
<p>
|
|
<ph conref="../shared/impala_common.xml#common/cast_int_to_timestamp"/>
|
|
</p>
|
|
|
|
<p>
|
|
In Impala 1.3 and higher, the <codeph>FROM_UNIXTIME()</codeph> and
|
|
<codeph>UNIX_TIMESTAMP()</codeph> functions allow a wider range of format strings, with
|
|
more flexibility in element order, repetition of letter placeholders, and separator
|
|
characters. In <keyword
|
|
keyref="impala23_full"/> and higher, the
|
|
<codeph>UNIX_TIMESTAMP()</codeph> function also allows a numeric timezone offset to be
|
|
specified as part of the input string. See
|
|
<xref
|
|
href="impala_datetime_functions.xml#datetime_functions"/> for details.
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/y2k38"/>
|
|
|
|
<p>
|
|
<b>Partitioning:</b>
|
|
</p>
|
|
|
|
<p>
|
|
Although you cannot use a <codeph>TIMESTAMP</codeph> column as a partition key, you can
|
|
extract the individual years, months, days, hours, and so on and partition based on those
|
|
columns. Because the partition key column values are represented in HDFS directory names,
|
|
rather than as fields in the data files themselves, you can also keep the original
|
|
<codeph>TIMESTAMP</codeph> values if desired, without duplicating data or wasting storage
|
|
space. See <xref
|
|
href="impala_partitioning.xml#partition_key_columns"/> for more
|
|
details on partitioning with date and time values.
|
|
</p>
|
|
|
|
<codeblock>[localhost:21000] > create table timeline (event string) partitioned by (happened timestamp);
|
|
ERROR: AnalysisException: Type 'TIMESTAMP' is not supported as partition-column type in column: happened
|
|
</codeblock>
|
|
|
|
<p conref="../shared/impala_common.xml#common/null_bad_timestamp_cast"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/hbase_ok"/>
|
|
|
|
<p>
|
|
<b>Parquet consideration:</b> int96 encoded Parquet timestamps are supported in Impala.
|
|
int64 timestamps will be supported in a future release.
|
|
</p>
|
|
|
|
<p/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/parquet_ok"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/text_bulky"/>
|
|
|
|
<!-- <p conref="../shared/impala_common.xml#common/compatibility_blurb"/> -->
|
|
|
|
<p conref="../shared/impala_common.xml#common/column_stats_constant"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/sqoop_blurb"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/sqoop_timestamp_caveat"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/kudu_blurb"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/kudu_timestamp_details"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
|
|
|
|
<p>
|
|
If you cast a <codeph>STRING</codeph> with an unrecognized format to a
|
|
<codeph>TIMESTAMP</codeph>, the result is <codeph>NULL</codeph> rather than an error. Make
|
|
sure to test your data pipeline to be sure any textual date and time values are in a
|
|
format that Impala <codeph>TIMESTAMP</codeph> can recognize.
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/avro_no_timestamp"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/example_blurb"/>
|
|
|
|
<p>
|
|
The following examples demonstrate using <codeph>TIMESTAMP</codeph> values with built-in
|
|
functions:
|
|
</p>
|
|
|
|
<codeblock>select cast('1966-07-30' as timestamp);
|
|
select cast('1985-09-25 17:45:30.005' as timestamp);
|
|
select cast('08:30:00' as timestamp);
|
|
select hour('1970-01-01 15:30:00'); -- Succeeds, returns 15.
|
|
select hour('1970-01-01 15:30'); -- Returns NULL because seconds field required.
|
|
select hour('1970-01-01 27:30:00'); -- Returns NULL because hour value out of range.
|
|
select dayofweek('2004-06-13'); -- Returns 1, representing Sunday.
|
|
select dayname('2004-06-13'); -- Returns 'Sunday'.
|
|
select date_add('2004-06-13', 365); -- Returns 2005-06-13 with zeros for hh:mm:ss fields.
|
|
select day('2004-06-13'); -- Returns 13.
|
|
select datediff('1989-12-31','1984-09-01'); -- How many days between these 2 dates?
|
|
select now(); -- Returns current date and time in local timezone.
|
|
</codeblock>
|
|
|
|
<p>
|
|
The following examples demonstrate using <codeph>TIMESTAMP</codeph> values with
|
|
HDFS-backed tables:
|
|
</p>
|
|
|
|
<codeblock>create table dates_and_times (t timestamp);
|
|
insert into dates_and_times values
|
|
('1966-07-30'), ('1985-09-25 17:45:30.005'), ('08:30:00'), (now());
|
|
</codeblock>
|
|
|
|
<p rev="IMPALA-5137">
|
|
The following examples demonstrate using <codeph>TIMESTAMP</codeph> values with Kudu
|
|
tables:
|
|
</p>
|
|
|
|
<codeblock rev="IMPALA-5137">create table timestamp_t (x int primary key, s string, t timestamp, b bigint)
|
|
partition by hash (x) partitions 16
|
|
stored as kudu;
|
|
|
|
-- The default value of now() has microsecond precision, so the final 3 digits
|
|
-- representing nanoseconds are all zero.
|
|
insert into timestamp_t values (1, cast(now() as string), now(), unix_timestamp(now()));
|
|
|
|
-- Values with 1-499 nanoseconds are rounded down in the Kudu TIMESTAMP column.
|
|
insert into timestamp_t values (2, cast(now() + interval 100 nanoseconds as string), now() + interval 100 nanoseconds, unix_timestamp(now() + interval 100 nanoseconds));
|
|
insert into timestamp_t values (3, cast(now() + interval 499 nanoseconds as string), now() + interval 499 nanoseconds, unix_timestamp(now() + interval 499 nanoseconds));
|
|
|
|
-- Values with 500-999 nanoseconds are rounded up in the Kudu TIMESTAMP column.
|
|
insert into timestamp_t values (4, cast(now() + interval 500 nanoseconds as string), now() + interval 500 nanoseconds, unix_timestamp(now() + interval 500 nanoseconds));
|
|
insert into timestamp_t values (5, cast(now() + interval 501 nanoseconds as string), now() + interval 501 nanoseconds, unix_timestamp(now() + interval 501 nanoseconds));
|
|
|
|
-- The string representation shows how underlying Impala TIMESTAMP can have nanosecond precision.
|
|
-- The TIMESTAMP column shows how timestamps in a Kudu table are rounded to microsecond precision.
|
|
-- The BIGINT column represents seconds past the epoch and so if not affected much by nanoseconds.
|
|
select s, t, b from timestamp_t order by t;
|
|
+-------------------------------+-------------------------------+------------+
|
|
| s | t | b |
|
|
+-------------------------------+-------------------------------+------------+
|
|
| 2017-05-31 15:30:05.107157000 | 2017-05-31 15:30:05.107157000 | 1496244605 |
|
|
| 2017-05-31 15:30:28.868151100 | 2017-05-31 15:30:28.868151000 | 1496244628 |
|
|
| 2017-05-31 15:34:33.674692499 | 2017-05-31 15:34:33.674692000 | 1496244873 |
|
|
| 2017-05-31 15:35:04.769166500 | 2017-05-31 15:35:04.769167000 | 1496244904 |
|
|
| 2017-05-31 15:35:33.033082501 | 2017-05-31 15:35:33.033083000 | 1496244933 |
|
|
+-------------------------------+-------------------------------+------------+
|
|
</codeblock>
|
|
|
|
<p conref="../shared/impala_common.xml#common/added_forever"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/related_info"/>
|
|
|
|
<ul>
|
|
<li>
|
|
<!-- The Timestamp Literals topic is pretty brief. Consider adding more examples there. -->
|
|
<xref href="impala_literals.xml#timestamp_literals"/>.
|
|
</li>
|
|
|
|
<li>
|
|
To convert to or from different date formats, or perform date arithmetic, use the date
|
|
and time functions described in
|
|
<xref
|
|
href="impala_datetime_functions.xml#datetime_functions"/>. In
|
|
particular, the <codeph>from_unixtime()</codeph> function requires a case-sensitive
|
|
format string such as <codeph>"yyyy-MM-dd HH:mm:ss.SSSS"</codeph>, matching one of the
|
|
allowed variations of a <codeph>TIMESTAMP</codeph> value (date plus time, only date,
|
|
only time, optional fractional seconds).
|
|
</li>
|
|
|
|
<li>
|
|
See <xref href="impala_langref_unsupported.xml#langref_hiveql_delta"
|
|
/> for
|
|
details about differences in <codeph>TIMESTAMP</codeph> handling between Impala and
|
|
Hive.
|
|
</li>
|
|
</ul>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|