mirror of
https://github.com/apache/impala.git
synced 2025-12-19 09:58:28 -05:00
Change-Id: I7135528e84f685bfe1c32d81f4cedb6afc133e04 Reviewed-on: http://gerrit.cloudera.org:8080/13886 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Quanlong Huang <huangquanlong@gmail.com>
400 lines
11 KiB
XML
400 lines
11 KiB
XML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<!--
|
|
Licensed to the Apache Software Foundation (ASF) under one
|
|
or more contributor license agreements. See the NOTICE file
|
|
distributed with this work for additional information
|
|
regarding copyright ownership. The ASF licenses this file
|
|
to you under the Apache License, Version 2.0 (the
|
|
"License"); you may not use this file except in compliance
|
|
with the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing,
|
|
software distributed under the License is distributed on an
|
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
KIND, either express or implied. See the License for the
|
|
specific language governing permissions and limitations
|
|
under the License.
|
|
-->
|
|
<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
|
|
<concept id="misc_functions">
|
|
|
|
<title>Impala Miscellaneous Functions</title>
|
|
|
|
<titlealts audience="PDF">
|
|
|
|
<navtitle>Miscellaneous Functions</navtitle>
|
|
|
|
</titlealts>
|
|
|
|
<prolog>
|
|
<metadata>
|
|
<data name="Category" value="Impala"/>
|
|
<data name="Category" value="Impala Functions"/>
|
|
<data name="Category" value="SQL"/>
|
|
<data name="Category" value="Data Analysts"/>
|
|
<data name="Category" value="Developers"/>
|
|
<data name="Category" value="Querying"/>
|
|
</metadata>
|
|
</prolog>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
Impala supports the following utility functions that do not operate on a particular column
|
|
or data type:
|
|
</p>
|
|
|
|
<ul>
|
|
<li>
|
|
<xref href="#misc_functions/current_database">CURRENT_DATABASE</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#misc_functions/effective_user">EFFECTIVE_USER</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#misc_functions/get_json_object"
|
|
>GET_JSON_OBJECT</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#misc_functions/logged_in_user">LOGGED_IN_USER</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#misc_functions/pid">PID</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#misc_functions/sleep">SLEEP</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#misc_functions/user">USER</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#misc_functions/uuid">UUID</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#misc_functions/version">VERSION</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#misc_functions/coordinator">COORDINATOR</xref>
|
|
</li>
|
|
</ul>
|
|
|
|
<dl>
|
|
<dlentry rev="1.3.0" id="current_database">
|
|
|
|
<dt>
|
|
CURRENT_DATABASE()
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the database that the session is currently using, either
|
|
<codeph>default</codeph> if no database has been selected, or whatever database the
|
|
session switched to through a <codeph>USE</codeph> statement or the
|
|
<cmdname>impalad</cmdname> <codeph>-d</codeph> option.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry rev="5.4.5" id="effective_user">
|
|
|
|
<dt>
|
|
EFFECTIVE_USER()
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Typically returns the same value as <codeph>USER()</codeph>. If
|
|
delegation is enabled, it returns the ID of the delegated user.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p>
|
|
<b>Added in:</b> <keyword keyref="impala225"/>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="get_json_object">
|
|
|
|
<dt>
|
|
GET_JSON_OBJECT(STRING json_str, STRING selector)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Extracts JSON object from the <varname>json_str</varname> based on the
|
|
<varname>selector</varname> JSON path and returns the string of the extracted JSON
|
|
object.
|
|
<p>
|
|
The function returns <codeph>NULL</codeph> if the input <varname>json_str</varname>
|
|
is invalid or if nothing is selected based on the <varname>selector</varname> JSON
|
|
path.
|
|
</p>
|
|
|
|
<p>
|
|
The following characters are supported in the <varname>selector</varname> JSON path:
|
|
<ul>
|
|
<li>
|
|
$ : Denotes the root object
|
|
</li>
|
|
|
|
<li>
|
|
. : Denotes the child operator
|
|
</li>
|
|
|
|
<li>
|
|
[] : Denotes the subscript operator for array
|
|
</li>
|
|
|
|
<li>
|
|
* : Denotes the wildcard for [] or .
|
|
</li>
|
|
</ul>
|
|
</p>
|
|
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p>
|
|
<b>Examples:</b>
|
|
</p>
|
|
<codeblock>---- QUERY
|
|
SELECT GET_JSON_OBJECT ('{"a":true, "b":false, "c":true}', '$.*');
|
|
---- RESULTS
|
|
[true,false,true]</codeblock>
|
|
<codeblock>---- QUERY
|
|
SELECT GET_JSON_OBJECT(t.json, '$.a.b.c') FROM (VALUES (
|
|
('{"a": {"b": {"c": 1}}}' AS json),
|
|
('{"a": {"b": {"c": 2}}}'),
|
|
('{"a": {"b": {"c": 3}}}')
|
|
)) t
|
|
---- RESULTS
|
|
'1'
|
|
'2'
|
|
'3'</codeblock>
|
|
<codeblock>---- QUERY
|
|
SELECT GET_JSON_OBJECT(t.json, '$.a'),
|
|
GET_JSON_OBJECT(t.json, '$.b'),
|
|
GET_JSON_OBJECT(t.json, '$.c')
|
|
FROM (VALUES (
|
|
('{"a":1, "b":2, "c":3}' AS json),
|
|
('{"b":2, "c":3}'),
|
|
('{"c":3}')
|
|
)) t
|
|
---- RESULTS
|
|
'1','2','3'
|
|
'NULL','2','3'
|
|
'NULL','NULL','3'</codeblock>
|
|
<codeblock>---- QUERY
|
|
SELECT GET_JSON_OBJECT(t.json, '$[1]'),
|
|
GET_JSON_OBJECT(t.json, '$[*]')
|
|
FROM (VALUES (
|
|
('["a", "b", "c"]' AS json),
|
|
('["a", "b"]'),
|
|
('["a"]')
|
|
)) t
|
|
---- RESULTS
|
|
'b','["a","b","c"]'
|
|
'b','["a","b"]'
|
|
'NULL','a'</codeblock>
|
|
<p>
|
|
<b>Added in:</b> <keyword keyref="impala31"/>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry rev="3.1" id="logged_in_user">
|
|
|
|
<dt>
|
|
LOGGED_IN_USER()
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Typically returns the same value as <codeph>USER()</codeph>. If
|
|
delegation is enabled, it returns the ID of the delegated user.
|
|
<p>
|
|
<codeph>LOGGED_IN_USER()</codeph> is an alias of <codeph>EFFECTIVE_USER()</codeph>.
|
|
</p>
|
|
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p>
|
|
<b>Added in:</b> <keyword keyref="impala31"/>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry rev="1.3.0" id="pid">
|
|
|
|
<dt>
|
|
PID()
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the process ID of the <cmdname>impalad</cmdname> daemon that
|
|
the session is connected to. You can use it during low-level debugging, to issue Linux
|
|
commands that trace, show the arguments, and so on the <cmdname>impalad</cmdname>
|
|
process.
|
|
<p>
|
|
<b>Return type:</b> <codeph>INT</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="sleep">
|
|
|
|
<dt>
|
|
SLEEP(INT ms)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Pauses the query for a specified number of milliseconds. For slowing
|
|
down queries with small result sets enough to monitor runtime execution, memory usage,
|
|
or other factors that otherwise would be difficult to capture during the brief
|
|
interval of query execution. When used in the <codeph>SELECT</codeph> list, it is
|
|
called once for each row in the result set; adjust the number of milliseconds
|
|
accordingly. For example, a query <codeph>SELECT *, SLEEP(5) FROM
|
|
table_with_1000_rows</codeph> would take at least 5 seconds to complete (5
|
|
milliseconds * 1000 rows in result set). To avoid an excessive number of concurrent
|
|
queries, use this function for troubleshooting on test and development systems, not
|
|
for production queries.
|
|
<p>
|
|
<b>Return type:</b> N/A
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry rev="1.1" id="user">
|
|
|
|
<dt>
|
|
USER()
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the username of the Linux user who is connected to the
|
|
<cmdname>impalad</cmdname> daemon. Typically called a single time, in a query without
|
|
any <codeph>FROM</codeph> clause, to understand how authorization settings apply in a
|
|
security context; once you know the logged-in username, you can check which groups
|
|
that user belongs to, and from the list of groups you can check which roles are
|
|
available to those groups through the authorization policy file.
|
|
<p conref="../shared/impala_common.xml#common/user_kerberized"/>
|
|
|
|
<p>
|
|
When delegation is enabled, consider calling the <codeph>EFFECTIVE_USER()</codeph>
|
|
function instead.
|
|
</p>
|
|
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry rev="2.5.0 IMPALA-1477" id="uuid">
|
|
|
|
<dt>
|
|
UUID()
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns a
|
|
<xref
|
|
href="https://en.wikipedia.org/wiki/Universally_unique_identifier"
|
|
scope="external" format="html">universal
|
|
unique identifier</xref>, a 128-bit value encoded as a string with groups of
|
|
hexadecimal digits separated by dashes.
|
|
<p>
|
|
Each call to <codeph>UUID()</codeph> produces a new arbitrary value.
|
|
</p>
|
|
|
|
<p>
|
|
If you get a UUID for each row of a result set, you can use it as a unique
|
|
identifier within a table, or even a unique ID across tables.
|
|
</p>
|
|
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/added_in_250"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
|
|
|
|
<p>
|
|
Ascending numeric sequences of type <codeph>BIGINT</codeph> are often used as
|
|
identifiers within a table, and as join keys across multiple tables. The
|
|
<codeph>UUID()</codeph> value is a convenient alternative that does not require
|
|
storing or querying the highest sequence number. For example, you can use it to
|
|
quickly construct new unique identifiers during a data import job, or to combine
|
|
data from different tables without the likelihood of ID collisions.
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="version">
|
|
|
|
<dt>
|
|
VERSION()
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns information such as the precise version number and build date
|
|
for the <codeph>impalad</codeph> daemon that you are currently connected to. Typically
|
|
used to confirm that you are connected to the expected level of Impala to use a
|
|
particular feature, or to connect to several nodes and confirm they are all running
|
|
the same level of <cmdname>impalad</cmdname>.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph> (with one or more embedded newlines)
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="coordinator" rev="IMPALA-589">
|
|
|
|
<dt>
|
|
COORDINATOR()
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the name of the host which is running the
|
|
<codeph>impalad</codeph> daemon that is acting as the <codeph>coordinator</codeph> for
|
|
the current query.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p>
|
|
<b>Added in:</b> <keyword keyref="impala31"/>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
</dl>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|