mirror of
https://github.com/apache/impala.git
synced 2026-01-26 03:01:30 -05:00
- The function titles were changed to upper case. - The function titles no longer use <codeph>. <codeph> font appears smaller than the <p> font. - Return type were changed to upper case data types. - Minor typos were fixed, such as extra commas and periods in titles. - The indexterm dita elememts were removed. Indexterm was incomplete and WIP. No plan to go ahead and implement it, so removed. Change-Id: I797532463da8d29fe5bc7543cfdfb5b2b82db197 Reviewed-on: http://gerrit.cloudera.org:8080/11619 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Michael Brown <mikeb@cloudera.com>
1591 lines
51 KiB
XML
1591 lines
51 KiB
XML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<!--
|
|
Licensed to the Apache Software Foundation (ASF) under one
|
|
or more contributor license agreements. See the NOTICE file
|
|
distributed with this work for additional information
|
|
regarding copyright ownership. The ASF licenses this file
|
|
to you under the Apache License, Version 2.0 (the
|
|
"License"); you may not use this file except in compliance
|
|
with the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing,
|
|
software distributed under the License is distributed on an
|
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
KIND, either express or implied. See the License for the
|
|
specific language governing permissions and limitations
|
|
under the License.
|
|
-->
|
|
<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
|
|
<concept id="string_functions">
|
|
|
|
<title>Impala String Functions</title>
|
|
|
|
<titlealts audience="PDF">
|
|
|
|
<navtitle>String Functions</navtitle>
|
|
|
|
</titlealts>
|
|
|
|
<prolog>
|
|
<metadata>
|
|
<data name="Category" value="Impala"/>
|
|
<data name="Category" value="Impala Functions"/>
|
|
<data name="Category" value="SQL"/>
|
|
<data name="Category" value="Data Analysts"/>
|
|
<data name="Category" value="Developers"/>
|
|
<data name="Category" value="Querying"/>
|
|
</metadata>
|
|
</prolog>
|
|
|
|
<conbody>
|
|
|
|
<p rev="2.0.0">
|
|
String functions are classified as those primarily accepting or returning
|
|
<codeph>STRING</codeph>, <codeph>VARCHAR</codeph>, or <codeph>CHAR</codeph> data types,
|
|
for example to measure the length of a string or concatenate two strings together.
|
|
<ul>
|
|
<li>
|
|
All the functions that accept <codeph>STRING</codeph> arguments also accept the
|
|
<codeph>VARCHAR</codeph> and <codeph>CHAR</codeph> types introduced in Impala 2.0.
|
|
</li>
|
|
|
|
<li>
|
|
Whenever <codeph>VARCHAR</codeph> or <codeph>CHAR</codeph> values are passed to a
|
|
function that returns a string value, the return type is normalized to
|
|
<codeph>STRING</codeph>. For example, a call to <codeph>CONCAT()</codeph> with a mix
|
|
of <codeph>STRING</codeph>, <codeph>VARCHAR</codeph>, and <codeph>CHAR</codeph>
|
|
arguments produces a <codeph>STRING</codeph> result.
|
|
</li>
|
|
</ul>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/related_info"/>
|
|
|
|
<p>
|
|
The string functions operate mainly on these data types:
|
|
<xref href="impala_string.xml#string"/>, <xref href="impala_varchar.xml#varchar"/>, and
|
|
<xref href="impala_char.xml#char"/>.
|
|
</p>
|
|
|
|
<p>
|
|
<b>Function reference:</b>
|
|
</p>
|
|
|
|
<p>
|
|
Impala supports the following string functions:
|
|
</p>
|
|
|
|
<ul>
|
|
<li>
|
|
<xref href="#string_functions/ascii">ASCII</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/base64decode">BASE64DECODE</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/base64encode">BASE64ENCODE</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/btrim">BTRIM</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/char_length">CHAR_LENGTH</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/chr">CHR</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/concat">CONCAT</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/concat_ws">CONCAT_WS</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/find_in_set">FIND_IN_SET</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/group_concat">GROUP_CONCAT</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/initcap">INITCAP</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/instr">INSTR</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/left">LEFT</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/length">LENGTH</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/locate">LOCATE</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/lower">LOWER, LCASE</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/lpad">LPAD</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/ltrim">LTRI </xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/parse_url">PARSE_URL</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/regexp_escape">REGEXP_ESCAPE</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/regexp_extract">REGEXP_EXTRACT</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/regexp_like">REGEXP_LIKE</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/regexp_replace">REGEXP_REPLACE</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/repeat">REPEAT</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/replace">REPLACE</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/reverse">REVERSE</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/right">RIGHT</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/rpad">RPAD</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/rtrim">RTRIM</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/space">SPACE</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/split_part">SPLIT_PART</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/strleft">STRLEFT</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/strright">STRRIGHT</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/substr">SUBSTR, SUBSTRING</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/translate">TRANSLATE</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/trim">TRIM</xref>
|
|
</li>
|
|
|
|
<li>
|
|
<xref href="#string_functions/upper">UPPER, UCASE</xref>
|
|
</li>
|
|
</ul>
|
|
|
|
<dl>
|
|
<dlentry id="ascii">
|
|
|
|
<dt>
|
|
ASCII(STRING str)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the numeric ASCII code of the first character of the argument.
|
|
<p>
|
|
<b>Return type:</b> <codeph>INT</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="base64decode" rev="2.6.0 IMPALA-2107">
|
|
|
|
<dt>
|
|
BASE64DECODE(STRING str)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b>
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
|
|
|
|
<p>
|
|
For general information about Base64 encoding, see
|
|
<xref
|
|
keyref="base64"/>.
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/base64_use_cases"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/base64_charset"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/base64_alignment"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/base64_error_handling"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/example_blurb"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/base64_examples"/>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="base64encode" rev="2.6.0 IMPALA-2107">
|
|
|
|
<dt>
|
|
BASE64ENCODE(STRING str)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b>
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
|
|
|
|
<p>
|
|
For general information about Base64 encoding, see
|
|
<xref
|
|
keyref="base64"/>.
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/base64_use_cases"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/base64_charset"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/base64_alignment"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/example_blurb"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/base64_examples"/>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry rev="2.3.0" id="btrim">
|
|
|
|
<dt>
|
|
BTRIM(STRING a), BTRIM(STRING a, STRING chars_to_trim)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Removes all instances of one or more characters from the start and end
|
|
of a <codeph>STRING</codeph> value. By default, removes only spaces. If a
|
|
non-<codeph>NULL</codeph> optional second argument is specified, the function removes
|
|
all occurrences of characters in that second argument from the beginning and end of
|
|
the string.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/added_in_230"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/example_blurb"/>
|
|
|
|
<p>
|
|
The following examples show the default <codeph>BTRIM()</codeph> behavior, and what
|
|
changes when you specify the optional second argument. All the examples bracket the
|
|
output value with <codeph>[ ]</codeph> so that you can see any leading or trailing
|
|
spaces in the <codeph>BTRIM()</codeph> result. By default, the function removes and
|
|
number of both leading and trailing spaces. When the second argument is specified,
|
|
any number of occurrences of any character in the second argument are removed from
|
|
the start and end of the input string; in this case, spaces are not removed (unless
|
|
they are part of the second argument) and any instances of the characters are not
|
|
removed if they do not come right at the beginning or end of the string.
|
|
</p>
|
|
<codeblock>-- Remove multiple spaces before and one space after.
|
|
select concat('[',btrim(' hello '),']');
|
|
+---------------------------------------+
|
|
| concat('[', btrim(' hello '), ']') |
|
|
+---------------------------------------+
|
|
| [hello] |
|
|
+---------------------------------------+
|
|
|
|
-- Remove any instances of x or y or z at beginning or end. Leave spaces alone.
|
|
select concat('[',btrim('xy hello zyzzxx','xyz'),']');
|
|
+------------------------------------------------------+
|
|
| concat('[', btrim('xy hello zyzzxx', 'xyz'), ']') |
|
|
+------------------------------------------------------+
|
|
| [ hello ] |
|
|
+------------------------------------------------------+
|
|
|
|
-- Remove any instances of x or y or z at beginning or end.
|
|
-- Leave x, y, z alone in the middle of the string.
|
|
select concat('[',btrim('xyhelxyzlozyzzxx','xyz'),']');
|
|
+----------------------------------------------------+
|
|
| concat('[', btrim('xyhelxyzlozyzzxx', 'xyz'), ']') |
|
|
+----------------------------------------------------+
|
|
| [helxyzlo] |
|
|
+----------------------------------------------------+
|
|
</codeblock>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry rev="1.3.0" id="char_length">
|
|
|
|
<dt>
|
|
CHAR_LENGTH(STRING a), CHARACTER_LENGTH(STRING a)
|
|
</dt>
|
|
|
|
<dd rev="IMPALA-6391 IMPALA-2172">
|
|
<b>Purpose:</b> Returns the length in characters of the argument string, including any
|
|
trailing spaces that pad a <codeph>CHAR</codeph> value.
|
|
<p>
|
|
<b>Return type:</b> <codeph>INT</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
|
|
|
|
<p>
|
|
When applied to a <codeph>STRING</codeph> value, it returns the same result as the
|
|
<codeph>length()</codeph> function. When applied to a <codeph>CHAR</codeph> value,
|
|
it might return a larger value than <codeph>length()</codeph> does, to account for
|
|
trailing spaces in the <codeph>CHAR</codeph>.
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/example_blurb"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/length_demo"/>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry rev="2.3.0" id="chr">
|
|
|
|
<dt>
|
|
CHR(INT character_code)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns a character specified by a decimal code point value. The
|
|
interpretation and display of the resulting character depends on your system locale.
|
|
Because consistent processing of Impala string values is only guaranteed for values
|
|
within the ASCII range, only use this function for values corresponding to ASCII
|
|
characters. In particular, parameter values greater than 255 return an empty string.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p>
|
|
<b>Usage notes:</b> Can be used as the inverse of the <codeph>ascii()</codeph>
|
|
function, which converts a character to its numeric ASCII code.
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/added_in_230"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/example_blurb"/>
|
|
<codeblock>SELECT chr(65);
|
|
+---------+
|
|
| chr(65) |
|
|
+---------+
|
|
| A |
|
|
+---------+
|
|
|
|
SELECT chr(97);
|
|
+---------+
|
|
| chr(97) |
|
|
+---------+
|
|
| a |
|
|
+---------+
|
|
</codeblock>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="concat">
|
|
|
|
<dt>
|
|
CONCAT(STRING a, STRING b...)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns a single string representing all the argument values joined
|
|
together.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/concat_blurb"/>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="concat_ws">
|
|
|
|
<dt>
|
|
CONCAT_WS(STRING sep, STRING a, STRING b...)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns a single string representing the second and following argument
|
|
values joined together, delimited by a specified separator.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/concat_blurb"/>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="find_in_set">
|
|
|
|
<dt>
|
|
FIND_IN_SEt(STRING str, STRING strList)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the position (starting from 1) of the first occurrence of a
|
|
specified string within a comma-separated string. Returns <codeph>NULL</codeph> if
|
|
either argument is <codeph>NULL</codeph>, 0 if the search string is not found, or 0 if
|
|
the search string contains a comma.
|
|
<p>
|
|
<b>Return type:</b> <codeph>INT</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry rev="1.2" id="group_concat">
|
|
|
|
<dt>
|
|
GROUP_CONCAT(STRING s [, STRING sep])
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns a single string representing the argument value concatenated
|
|
together for each row of the result set. If the optional separator string is
|
|
specified, the separator is added between each pair of concatenated values.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/concat_blurb"/>
|
|
|
|
<p>
|
|
By default, returns a single string covering the whole result set. To include other
|
|
columns or values in the result set, or to produce multiple concatenated strings for
|
|
subsets of rows, include a <codeph>GROUP BY</codeph> clause in the query.
|
|
</p>
|
|
|
|
<p>
|
|
Strictly speaking, <codeph>GROUP_CONCAT()</codeph> is an aggregate function, not a
|
|
scalar function like the others in this list. For additional details and examples,
|
|
see <xref
|
|
href="impala_group_concat.xml#group_concat"/>.
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry rev="1.2" id="initcap">
|
|
|
|
<dt>
|
|
INITCAP(STRING str)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the input string with the first letter capitalized.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="instr">
|
|
|
|
<dt>
|
|
INSTR(STRING str, STRING substr <ph rev="IMPALA-3973">[, BIGINT position [, BIGINT
|
|
occurrence ] ]</ph>)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the position (starting from 1) of the first occurrence of a
|
|
substring within a longer string.
|
|
<p>
|
|
<b>Return type:</b> <codeph>INT</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
|
|
|
|
<p>
|
|
If the substring is not present in the string, the function returns 0:
|
|
</p>
|
|
<codeblock rev="IMPALA-3973 2.8.0">
|
|
select instr('foo bar bletch', 'z');
|
|
+------------------------------+
|
|
| instr('foo bar bletch', 'z') |
|
|
+------------------------------+
|
|
| 0 |
|
|
+------------------------------+
|
|
</codeblock>
|
|
<p rev="IMPALA-3973 2.8.0">
|
|
The optional third and fourth arguments let you find instances of the substring
|
|
other than the first instance starting from the left:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<p>
|
|
The third argument lets you specify a starting point within the string other
|
|
than 1:
|
|
</p>
|
|
<codeblock>
|
|
-- Restricting the search to positions 7..end,
|
|
-- the first occurrence of 'b' is at position 9.
|
|
select instr('foo bar bletch', 'b', 7);
|
|
+---------------------------------+
|
|
| instr('foo bar bletch', 'b', 7) |
|
|
+---------------------------------+
|
|
| 9 |
|
|
+---------------------------------+
|
|
|
|
-- If there are no more occurrences after the
|
|
-- specified position, the result is 0.
|
|
select instr('foo bar bletch', 'b', 10);
|
|
+----------------------------------+
|
|
| instr('foo bar bletch', 'b', 10) |
|
|
+----------------------------------+
|
|
| 0 |
|
|
+----------------------------------+
|
|
</codeblock>
|
|
<p>
|
|
If the third argument is negative, the search works right-to-left starting that
|
|
many characters from the right. The return value still represents the position
|
|
starting from the left side of the string.
|
|
</p>
|
|
<codeblock rev="IMPALA-3973 2.8.0">
|
|
-- Scanning right to left, the first occurrence of 'o'
|
|
-- is at position 8. (8th character from the left.)
|
|
select instr('hello world','o',-1);
|
|
+-------------------------------+
|
|
| instr('hello world', 'o', -1) |
|
|
+-------------------------------+
|
|
| 8 |
|
|
+-------------------------------+
|
|
|
|
-- Scanning right to left, starting from the 6th character
|
|
-- from the right, the first occurrence of 'o' is at
|
|
-- position 5 (5th character from the left).
|
|
select instr('hello world','o',-6);
|
|
+-------------------------------+
|
|
| instr('hello world', 'o', -6) |
|
|
+-------------------------------+
|
|
| 5 |
|
|
+-------------------------------+
|
|
|
|
-- If there are no more occurrences after the
|
|
-- specified position, the result is 0.
|
|
select instr('hello world','o',-10);
|
|
+--------------------------------+
|
|
| instr('hello world', 'o', -10) |
|
|
+--------------------------------+
|
|
| 0 |
|
|
+--------------------------------+
|
|
</codeblock>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
The fourth argument lets you specify an occurrence other than the first:
|
|
</p>
|
|
<codeblock rev="IMPALA-3973 2.8.0">
|
|
-- 2nd occurrence of 'b' is at position 9.
|
|
select instr('foo bar bletch', 'b', 1, 2);
|
|
+------------------------------------+
|
|
| instr('foo bar bletch', 'b', 1, 2) |
|
|
+------------------------------------+
|
|
| 9 |
|
|
+------------------------------------+
|
|
|
|
-- Negative position argument means scan right-to-left.
|
|
-- This example finds second instance of 'b' from the right.
|
|
select instr('foo bar bletch', 'b', -1, 2);
|
|
+-------------------------------------+
|
|
| instr('foo bar bletch', 'b', -1, 2) |
|
|
+-------------------------------------+
|
|
| 5 |
|
|
+-------------------------------------+
|
|
</codeblock>
|
|
<p>
|
|
If the fourth argument is greater than the number of matching occurrences, the
|
|
function returns 0:
|
|
</p>
|
|
<codeblock>
|
|
-- There is no 3rd occurrence within the string.
|
|
select instr('foo bar bletch', 'b', 1, 3);
|
|
+------------------------------------+
|
|
| instr('foo bar bletch', 'b', 1, 3) |
|
|
+------------------------------------+
|
|
| 0 |
|
|
+------------------------------------+
|
|
|
|
-- There is not even 1 occurrence when scanning
|
|
-- the string starting at position 10.
|
|
select instr('foo bar bletch', 'b', 10, 1);
|
|
+-------------------------------------+
|
|
| instr('foo bar bletch', 'b', 10, 1) |
|
|
+-------------------------------------+
|
|
| 0 |
|
|
+-------------------------------------+
|
|
</codeblock>
|
|
<p>
|
|
The fourth argument cannot be negative or zero. A non-positive value for this
|
|
argument causes an error:
|
|
</p>
|
|
<codeblock>
|
|
select instr('foo bar bletch', 'b', 1, 0);
|
|
ERROR: UDF ERROR: Invalid occurrence parameter to instr function: 0
|
|
|
|
select instr('aaaaaaaaa','aa', 1, -1);
|
|
ERROR: UDF ERROR: Invalid occurrence parameter to instr function: -1
|
|
</codeblock>
|
|
</li>
|
|
|
|
<li>
|
|
<p>
|
|
If either of the optional arguments is <codeph>NULL</codeph>, the function also
|
|
returns <codeph>NULL</codeph>:
|
|
</p>
|
|
<codeblock>
|
|
select instr('foo bar bletch', 'b', null);
|
|
+------------------------------------+
|
|
| instr('foo bar bletch', 'b', null) |
|
|
+------------------------------------+
|
|
| NULL |
|
|
+------------------------------------+
|
|
|
|
select instr('foo bar bletch', 'b', 1, null);
|
|
+---------------------------------------+
|
|
| instr('foo bar bletch', 'b', 1, null) |
|
|
+---------------------------------------+
|
|
| NULL |
|
|
+---------------------------------------+
|
|
</codeblock>
|
|
</li>
|
|
</ul>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="left">
|
|
|
|
<dt>
|
|
LEFT(STRING a, INT num_chars)
|
|
</dt>
|
|
|
|
<dd>
|
|
See the <codeph>STRLEFT()</codeph> function.
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="length">
|
|
|
|
<dt>
|
|
LENGTH(STRING a)
|
|
</dt>
|
|
|
|
<dd rev="IMPALA-6391 IMPALA-2172">
|
|
<b>Purpose:</b> Returns the length in characters of the argument string, ignoring any
|
|
trailing spaces in <codeph>CHAR</codeph> values.
|
|
<p>
|
|
<b>Return type:</b> <codeph>INT</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
|
|
|
|
<p>
|
|
When applied to a <codeph>STRING</codeph> value, it returns the same result as the
|
|
<codeph>CHAR_LENGTH()</codeph> function. When applied to a <codeph>CHAR</codeph>
|
|
value, it might return a smaller value than <codeph>CHAR_LENGTH()</codeph> does,
|
|
because <codeph>LENGTH()</codeph> ignores any trailing spaces in the
|
|
<codeph>CHAR</codeph>.
|
|
</p>
|
|
|
|
<note>
|
|
Because the behavior of <codeph>LENGTH()</codeph> with <codeph>CHAR</codeph> values
|
|
containing trailing spaces is not standardized across the industry, when porting
|
|
code from other database systems, evaluate the behavior of <codeph>LENGTH()</codeph>
|
|
on the source system and switch to <codeph>CHAR_LENGTH()</codeph> for Impala if
|
|
necessary.
|
|
</note>
|
|
|
|
<p conref="../shared/impala_common.xml#common/example_blurb"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/length_demo"/>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="locate">
|
|
|
|
<dt>
|
|
LOCATE(STRING substr, STRING str[, INT pos])
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the position (starting from 1) of the first occurrence of a
|
|
substring within a longer string, optionally after a particular position.
|
|
<p>
|
|
<b>Return type:</b> <codeph>INT</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="lower">
|
|
|
|
<dt>
|
|
LOWER(STRING a), <ph id="lcase">LCASE(STRING a)</ph>
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the argument string converted to all-lowercase.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
|
|
|
|
<p
|
|
conref="../shared/impala_common.xml#common/case_insensitive_comparisons_tip"
|
|
/>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="lpad">
|
|
|
|
<dt>
|
|
LPAD(STRING str, INT len, STRING pad)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns a string of a specified length, based on the first argument
|
|
string. If the specified string is too short, it is padded on the left with a
|
|
repeating sequence of the characters from the pad string. If the specified string is
|
|
too long, it is truncated on the right.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="ltrim">
|
|
|
|
<dt>
|
|
LTRIM(STRING a [, STRING chars_to_trim])
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the argument string with all occurrences of characters
|
|
specified by the second argument removed from the left side. Removes spaces if the
|
|
second argument is not specified.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="parse_url">
|
|
|
|
<dt>
|
|
PARSE_URL(STRING urlString, STRING partToExtract [, STRING keyToExtract])
|
|
</dt>
|
|
|
|
<dd>
|
|
<indexterm audience="hidden">parse_url() function</indexterm>
|
|
<b>Purpose:</b> Returns the portion of a URL corresponding to a specified part. The
|
|
part argument can be <codeph>'PROTOCOL'</codeph>, <codeph>'HOST'</codeph>,
|
|
<codeph>'PATH'</codeph>, <codeph>'REF'</codeph>, <codeph>'AUTHORITY'</codeph>,
|
|
<codeph>'FILE'</codeph>, <codeph>'USERINFO'</codeph>, or <codeph>'QUERY'</codeph>.
|
|
Uppercase is required for these literal values. When requesting the
|
|
<codeph>QUERY</codeph> portion of the URL, you can optionally specify a key to
|
|
retrieve just the associated value from the key-value pairs in the query string.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p>
|
|
<b>Usage notes:</b> This function is important for the traditional Hadoop use case
|
|
of interpreting web logs. For example, if the web traffic data features raw URLs not
|
|
divided into separate table columns, you can count visitors to a particular page by
|
|
extracting the <codeph>'PATH'</codeph> or <codeph>'FILE'</codeph> field, or analyze
|
|
search terms by extracting the corresponding key from the <codeph>'QUERY'</codeph>
|
|
field.
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry rev="3.0" id="regexp_escape">
|
|
|
|
<dt>
|
|
REGEXP_ESCAPE(STRING source)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> The <codeph>REGEXP_ESCAPE()</codeph> function returns a string escaped
|
|
for the special character in RE2 library so that the special characters are
|
|
interpreted literally rather than as special characters. The following special
|
|
characters are escaped by the function:
|
|
<codeblock><![CDATA[.\+*?[^]$(){}=!<>|:-]]></codeblock>
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/regexp_re2"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/regexp_re2_warning"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/regexp_escapes"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/example_blurb"/>
|
|
|
|
<p>
|
|
This example shows escaping one of special characters in RE2.
|
|
</p>
|
|
<codeblock>
|
|
+------------------------------------------------------+
|
|
| regexp_escape('Hello.world') |
|
|
+------------------------------------------------------+
|
|
| Hello\.world |
|
|
+------------------------------------------------------+
|
|
</codeblock>
|
|
<p>
|
|
This example shows escaping all the special characters in RE2.
|
|
</p>
|
|
<codeblock><![CDATA[
|
|
+------------------------------------------------------------+
|
|
| regexp_escape('a.b\\c+d*e?f[g]h$i(j)k{l}m=n!o<p>q|r:s-t') |
|
|
+------------------------------------------------------------+
|
|
| a\.b\\c\+d\*e\?f\[g\]h\$i\(j\)k\{l\}m\=n\!o\<p\>q\|r\:s\-t |
|
|
+------------------------------------------------------------+
|
|
]]>
|
|
</codeblock>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="regexp_extract">
|
|
|
|
<dt>
|
|
REGEXP_EXTRACT(STRING subject, STRING pattern, INT index)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the specified () group from a string based on a regular
|
|
expression pattern. Group 0 refers to the entire extracted string, while group 1, 2,
|
|
and so on refers to the first, second, and so on <codeph>(...)</codeph> portion.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/regexp_re2"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/regexp_re2_warning"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/regexp_escapes"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/example_blurb"/>
|
|
|
|
<p>
|
|
This example shows how group 0 matches the full pattern string, including the
|
|
portion outside any <codeph>()</codeph> group:
|
|
</p>
|
|
<codeblock>[localhost:21000] > select regexp_extract('abcdef123ghi456jkl','.*?(\\d+)',0);
|
|
+------------------------------------------------------+
|
|
| regexp_extract('abcdef123ghi456jkl', '.*?(\\d+)', 0) |
|
|
+------------------------------------------------------+
|
|
| abcdef123ghi456 |
|
|
+------------------------------------------------------+
|
|
Returned 1 row(s) in 0.11s</codeblock>
|
|
<p>
|
|
This example shows how group 1 matches just the contents inside the first
|
|
<codeph>()</codeph> group in the pattern string:
|
|
</p>
|
|
<codeblock>[localhost:21000] > select regexp_extract('abcdef123ghi456jkl','.*?(\\d+)',1);
|
|
+------------------------------------------------------+
|
|
| regexp_extract('abcdef123ghi456jkl', '.*?(\\d+)', 1) |
|
|
+------------------------------------------------------+
|
|
| 456 |
|
|
+------------------------------------------------------+
|
|
Returned 1 row(s) in 0.11s</codeblock>
|
|
<p rev="2.0.0">
|
|
Unlike in earlier Impala releases, the regular expression library used in Impala 2.0
|
|
and later supports the <codeph>.*?</codeph> idiom for non-greedy matches. This
|
|
example shows how a pattern string starting with <codeph>.*?</codeph> matches the
|
|
shortest possible portion of the source string, returning the rightmost set of
|
|
lowercase letters. A pattern string both starting and ending with
|
|
<codeph>.*?</codeph> finds two potential matches of equal length, and returns the
|
|
first one found (the leftmost set of lowercase letters).
|
|
</p>
|
|
<codeblock>[localhost:21000] > select regexp_extract('AbcdBCdefGHI','.*?([[:lower:]]+)',1);
|
|
+--------------------------------------------------------+
|
|
| regexp_extract('abcdbcdefghi', '.*?([[:lower:]]+)', 1) |
|
|
+--------------------------------------------------------+
|
|
| def |
|
|
+--------------------------------------------------------+
|
|
[localhost:21000] > select regexp_extract('AbcdBCdefGHI','.*?([[:lower:]]+).*?',1);
|
|
+-----------------------------------------------------------+
|
|
| regexp_extract('abcdbcdefghi', '.*?([[:lower:]]+).*?', 1) |
|
|
+-----------------------------------------------------------+
|
|
| bcd |
|
|
+-----------------------------------------------------------+
|
|
</codeblock>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry rev="2.3.0" id="regexp_like">
|
|
|
|
<dt>
|
|
REGEXP_LIKE(STRING source, STRING pattern[, STRING options])
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns <codeph>true</codeph> or <codeph>false</codeph> to indicate
|
|
whether the source string contains anywhere inside it the regular expression given by
|
|
the pattern. The optional third argument consists of letter flags that change how the
|
|
match is performed, such as <codeph>i</codeph> for case-insensitive matching.
|
|
<p
|
|
conref="../shared/impala_common.xml#common/syntax_blurb"/>
|
|
|
|
<p>
|
|
The flags that you can include in the optional third argument are:
|
|
</p>
|
|
<ul>
|
|
<li>
|
|
<codeph>c</codeph>: Case-sensitive matching (the default).
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>i</codeph>: Case-insensitive matching. If multiple instances of
|
|
<codeph>c</codeph> and <codeph>i</codeph> are included in the third argument, the
|
|
last such option takes precedence.
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>m</codeph>: Multi-line matching. The <codeph>^</codeph> and
|
|
<codeph>$</codeph> operators match the start or end of any line within the source
|
|
string, not the start and end of the entire string.
|
|
</li>
|
|
|
|
<li>
|
|
<codeph>n</codeph>: Newline matching. The <codeph>.</codeph> operator can match
|
|
the newline character. A repetition operator such as <codeph>.*</codeph> can match
|
|
a portion of the source string that spans multiple lines.
|
|
</li>
|
|
</ul>
|
|
<p>
|
|
<b>Return type:</b> <codeph>BOOLEAN</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/regexp_re2"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/regexp_re2_warning"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/regexp_escapes"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/example_blurb"/>
|
|
|
|
<p>
|
|
This example shows how <codeph>regexp_like()</codeph> can test for the existence of
|
|
various kinds of regular expression patterns within a source string:
|
|
</p>
|
|
<codeblock><![CDATA[
|
|
-- Matches because the 'f' appears somewhere in 'foo'.
|
|
select regexp_like('foo','f');
|
|
+-------------------------+
|
|
| regexp_like('foo', 'f') |
|
|
+-------------------------+
|
|
| true |
|
|
+-------------------------+
|
|
|
|
-- Does not match because the comparison is case-sensitive by default.
|
|
select regexp_like('foo','F');
|
|
+-------------------------+
|
|
| regexp_like('foo', 'f') |
|
|
+-------------------------+
|
|
| false |
|
|
+-------------------------+
|
|
|
|
-- The 3rd argument can change the matching logic, such as 'i' meaning case-insensitive.
|
|
select regexp_like('foo','F','i');
|
|
+------------------------------+
|
|
| regexp_like('foo', 'f', 'i') |
|
|
+------------------------------+
|
|
| true |
|
|
+------------------------------+
|
|
|
|
-- The familiar regular expression notations work, such as ^ and $ anchors...
|
|
select regexp_like('foo','f$');
|
|
+--------------------------+
|
|
| regexp_like('foo', 'f$') |
|
|
+--------------------------+
|
|
| false |
|
|
+--------------------------+
|
|
|
|
select regexp_like('foo','o$');
|
|
+--------------------------+
|
|
| regexp_like('foo', 'o$') |
|
|
+--------------------------+
|
|
| true |
|
|
+--------------------------+
|
|
|
|
-- ...and repetition operators such as * and +
|
|
select regexp_like('foooooobar','fo+b');
|
|
+-----------------------------------+
|
|
| regexp_like('foooooobar', 'fo+b') |
|
|
+-----------------------------------+
|
|
| true |
|
|
+-----------------------------------+
|
|
|
|
select regexp_like('foooooobar','fx*y*o*b');
|
|
+---------------------------------------+
|
|
| regexp_like('foooooobar', 'fx*y*o*b') |
|
|
+---------------------------------------+
|
|
| true |
|
|
+---------------------------------------+
|
|
]]>
|
|
</codeblock>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="regexp_replace">
|
|
|
|
<dt>
|
|
REGEXP_REPLACE(STRING initial, STRING pattern, STRING replacement)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the initial argument with the regular expression pattern
|
|
replaced by the final argument string.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/regexp_re2"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/regexp_re2_warning"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/regexp_escapes"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/example_blurb"/>
|
|
|
|
<p>
|
|
These examples show how you can replace parts of a string matching a pattern with
|
|
replacement text, which can include backreferences to any <codeph>()</codeph> groups
|
|
in the pattern string. The backreference numbers start at 1, and any
|
|
<codeph>\</codeph> characters must be escaped as <codeph>\\</codeph>.
|
|
</p>
|
|
|
|
<p>
|
|
Replace a character pattern with new text:
|
|
</p>
|
|
<codeblock>[localhost:21000] > select regexp_replace('aaabbbaaa','b+','xyz');
|
|
+------------------------------------------+
|
|
| regexp_replace('aaabbbaaa', 'b+', 'xyz') |
|
|
+------------------------------------------+
|
|
| aaaxyzaaa |
|
|
+------------------------------------------+
|
|
Returned 1 row(s) in 0.11s</codeblock>
|
|
<p>
|
|
Replace a character pattern with substitution text that includes the original
|
|
matching text:
|
|
</p>
|
|
<codeblock>[localhost:21000] > select regexp_replace('aaabbbaaa','(b+)','<\\1>');
|
|
+----------------------------------------------+
|
|
| regexp_replace('aaabbbaaa', '(b+)', '<\\1>') |
|
|
+----------------------------------------------+
|
|
| aaa<bbb>aaa |
|
|
+----------------------------------------------+
|
|
Returned 1 row(s) in 0.11s</codeblock>
|
|
<p>
|
|
Remove all characters that are not digits:
|
|
</p>
|
|
<codeblock>[localhost:21000] > select regexp_replace('123-456-789','[^[:digit:]]','');
|
|
+---------------------------------------------------+
|
|
| regexp_replace('123-456-789', '[^[:digit:]]', '') |
|
|
+---------------------------------------------------+
|
|
| 123456789 |
|
|
+---------------------------------------------------+
|
|
Returned 1 row(s) in 0.12s</codeblock>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="repeat">
|
|
|
|
<dt>
|
|
REPEAT(STRING str, INT n)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the argument string repeated a specified number of times.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="replace" rev="2.9.0 IMPALA-4729">
|
|
|
|
<dt>
|
|
REPLACE(STRING initial, STRING target, STRING replacement)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the initial argument with all occurrences of the target string
|
|
replaced by the replacement string.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
|
|
|
|
<p>
|
|
Because this function does not use any regular expression patterns, it is typically
|
|
faster than <codeph>regexp_replace()</codeph> for simple string substitutions.
|
|
</p>
|
|
|
|
<p>
|
|
If any argument is <codeph>NULL</codeph>, the return value is <codeph>NULL</codeph>.
|
|
</p>
|
|
|
|
<p>
|
|
Matching is case-sensitive.
|
|
</p>
|
|
|
|
<p>
|
|
If the replacement string contains another instance of the target string, the
|
|
expansion is only performed once, instead of applying again to the newly constructed
|
|
string.
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/added_in_290"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/example_blurb"/>
|
|
<codeblock>-- Replace one string with another.
|
|
select replace('hello world','world','earth');
|
|
+------------------------------------------+
|
|
| replace('hello world', 'world', 'earth') |
|
|
+------------------------------------------+
|
|
| hello earth |
|
|
+------------------------------------------+
|
|
|
|
-- All occurrences of the target string are replaced.
|
|
select replace('hello world','o','0');
|
|
+----------------------------------+
|
|
| replace('hello world', 'o', '0') |
|
|
+----------------------------------+
|
|
| hell0 w0rld |
|
|
+----------------------------------+
|
|
|
|
-- If no match is found, the original string is returned unchanged.
|
|
select replace('hello world','xyz','abc');
|
|
+--------------------------------------+
|
|
| replace('hello world', 'xyz', 'abc') |
|
|
+--------------------------------------+
|
|
| hello world |
|
|
+--------------------------------------+
|
|
</codeblock>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="reverse">
|
|
|
|
<dt>
|
|
REVERSE(STRING a)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the argument string with characters in reversed order.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="right">
|
|
|
|
<dt>
|
|
RIGHT(STRING a, INT num_chars)
|
|
</dt>
|
|
|
|
<dd>
|
|
See the <codeph>STRRIGHT()</codeph> function.
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="rpad">
|
|
|
|
<dt>
|
|
RPAD(STRING str, INT len, STRING pad)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns a string of a specified length, based on the first argument
|
|
string. If the specified string is too short, it is padded on the right with a
|
|
repeating sequence of the characters from the pad string. If the specified string is
|
|
too long, it is truncated on the right.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="rtrim">
|
|
|
|
<dt>
|
|
RTRIM(STRING a [, STRING chars_to_trim])
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the argument string with all occurrences of characters
|
|
specified by the second argument removed from the right side. Removes spaces if the
|
|
second argument is not specified.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="space">
|
|
|
|
<dt>
|
|
SPACE(INT n)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns a concatenated string of the specified number of spaces.
|
|
Shorthand for <codeph>REPEAT(' ',<varname>n</varname>)</codeph>.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry rev="2.3.0 IMPALA-2084" id="split_part">
|
|
|
|
<dt>
|
|
SPLIT_PART(STRING source, STRING delimiter, BIGINT n)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the nth field within a delimited string. The fields are
|
|
numbered starting from 1. The delimiter can consist of multiple characters, not just a
|
|
single character. All matching of the delimiter is done exactly, not using any regular
|
|
expression patterns.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/regexp_re2"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/regexp_re2_warning"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/regexp_escapes"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/example_blurb"/>
|
|
|
|
<p>
|
|
These examples show how to retrieve the nth field from a delimited string:
|
|
</p>
|
|
<codeblock><![CDATA[
|
|
select split_part('x,y,z',',',1);
|
|
+-----------------------------+
|
|
| split_part('x,y,z', ',', 1) |
|
|
+-----------------------------+
|
|
| x |
|
|
+-----------------------------+
|
|
|
|
select split_part('x,y,z',',',2);
|
|
+-----------------------------+
|
|
| split_part('x,y,z', ',', 2) |
|
|
+-----------------------------+
|
|
| y |
|
|
+-----------------------------+
|
|
|
|
select split_part('x,y,z',',',3);
|
|
+-----------------------------+
|
|
| split_part('x,y,z', ',', 3) |
|
|
+-----------------------------+
|
|
| z |
|
|
+-----------------------------+
|
|
]]>
|
|
</codeblock>
|
|
<p>
|
|
These examples show what happens for out-of-range field positions. Specifying a
|
|
value less than 1 produces an error. Specifying a value greater than the number of
|
|
fields returns a zero-length string (which is not the same as
|
|
<codeph>NULL</codeph>).
|
|
</p>
|
|
<codeblock><![CDATA[
|
|
select split_part('x,y,z',',',0);
|
|
ERROR: Invalid field position: 0
|
|
|
|
with t1 as (select split_part('x,y,z',',',4) nonexistent_field)
|
|
select
|
|
nonexistent_field
|
|
, concat('[',nonexistent_field,']')
|
|
, length(nonexistent_field);
|
|
from t1
|
|
+-------------------+-------------------------------------+---------------------------+
|
|
| nonexistent_field | concat('[', nonexistent_field, ']') | length(nonexistent_field) |
|
|
+-------------------+-------------------------------------+---------------------------+
|
|
| | [] | 0 |
|
|
+-------------------+-------------------------------------+---------------------------+
|
|
]]>
|
|
</codeblock>
|
|
<p>
|
|
These examples show how the delimiter can be a multi-character value:
|
|
</p>
|
|
<codeblock><![CDATA[
|
|
select split_part('one***two***three','***',2);
|
|
+-------------------------------------------+
|
|
| split_part('one***two***three', '***', 2) |
|
|
+-------------------------------------------+
|
|
| two |
|
|
+-------------------------------------------+
|
|
|
|
select split_part('one\|/two\|/three','\|/',3);
|
|
+-------------------------------------------+
|
|
| split_part('one\|/two\|/three', '\|/', 3) |
|
|
+-------------------------------------------+
|
|
| three |
|
|
+-------------------------------------------+
|
|
]]>
|
|
</codeblock>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="strleft">
|
|
|
|
<dt>
|
|
STRLEFT(STRING a, INT num_chars)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the leftmost characters of the string. Shorthand for a call to
|
|
<codeph>SUBSTR()</codeph> with 2 arguments.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="strright">
|
|
|
|
<dt>
|
|
STRRIGHT(STRING a, INT num_chars)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the rightmost characters of the string. Shorthand for a call
|
|
to <codeph>SUBSTR()</codeph> with 2 arguments.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="substr">
|
|
|
|
<dt>
|
|
SUBSTR(STRING a, INT start [, INT len]),
|
|
<ph id="substring"
|
|
>SUBSTRING(STRING a, INT start [, INT len])</ph>
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the portion of the string starting at a specified point,
|
|
optionally with a specified maximum length. The characters in the string are indexed
|
|
starting at 1.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="translate">
|
|
|
|
<dt>
|
|
TRANSLATE(STRING input, STRING from, STRING to)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the <codeph>input</codeph> string with each character in the
|
|
<codeph>from</codeph> argument replaced with the corresponding character in the
|
|
<codeph>to</codeph> argument. The characters are matched in the order they appear in
|
|
<codeph>from</codeph> and <codeph>to</codeph>.
|
|
<p>
|
|
For example: <codeph>translate ('hello world','world','earth')</codeph> returns
|
|
<codeph>'hetta earth'</codeph>.
|
|
</p>
|
|
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p>
|
|
<b>Usage notes:</b>
|
|
</p>
|
|
|
|
<p>
|
|
If <codeph>from</codeph> contains more characters than <codeph>to</codeph>, the
|
|
<codeph>from</codeph> characters that are beyond the length of <codeph>to</codeph>
|
|
are removed in the result.
|
|
</p>
|
|
|
|
<p>
|
|
For example:
|
|
</p>
|
|
|
|
<p>
|
|
<codeph>translate('abcdedg', 'bcd', '1')</codeph> returns <codeph>'a1eg'</codeph>.
|
|
</p>
|
|
|
|
<p>
|
|
<codeph>translate('Unit Number#2', '# ', '_')</codeph> returns
|
|
<codeph>'UnitNumber_2'</codeph>.
|
|
</p>
|
|
|
|
<p>
|
|
If <codeph>from</codeph> is <codeph>NULL</codeph>, the function returns
|
|
<codeph>NULL</codeph>.
|
|
</p>
|
|
|
|
<p>
|
|
If <codeph>to</codeph> contains more characters than <codeph>from</codeph>, the
|
|
extra characters in <codeph>to</codeph> are ignored.
|
|
</p>
|
|
|
|
<p>
|
|
If <codeph>from</codeph> contains duplicate characters, the duplicate character is
|
|
replaced with the first matching character in <codeph>to</codeph>.
|
|
</p>
|
|
|
|
<p>
|
|
For example: <codeph>translate ('hello','ll','67')</codeph> returns
|
|
<codeph>'he66o'</codeph>.
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="trim">
|
|
|
|
<dt>
|
|
TRIM(STRING a)
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the input string with both leading and trailing spaces
|
|
removed. The same as passing the string through both <codeph>LTRIM()</codeph> and
|
|
<codeph>RTRIM()</codeph>.
|
|
<p>
|
|
<b>Usage notes:</b> Often used during data cleansing operations during the ETL
|
|
cycle, if input values might still have surrounding spaces. For a more
|
|
general-purpose function that can remove other leading and trailing characters
|
|
besides spaces, see <codeph>BTRIM()</codeph>.
|
|
</p>
|
|
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
|
|
<dlentry id="upper">
|
|
|
|
<dt>
|
|
UPPER(STRING a), <ph id="ucase">UCASE(STRING a)</ph>
|
|
</dt>
|
|
|
|
<dd>
|
|
<b>Purpose:</b> Returns the argument string converted to all-uppercase.
|
|
<p>
|
|
<b>Return type:</b> <codeph>STRING</codeph>
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
|
|
|
|
<p
|
|
conref="../shared/impala_common.xml#common/case_insensitive_comparisons_tip"
|
|
/>
|
|
</dd>
|
|
|
|
</dlentry>
|
|
</dl>
|
|
|
|
</conbody>
|
|
|
|
</concept>
|