mirror of
https://github.com/apache/impala.git
synced 2025-12-30 03:01:44 -05:00
For this change to land in master, the audience="hidden" code review needs to be completed first. Otherwise, the doc build would still work but the audience="hidden" content would be visible rather than hidden as desired. Some work happening in parallel might introduce additional instances of audience="Cloudera". I suggest addressing those in a followup CR so this global change can land quickly. Since the changes apply across so many different files, but are so narrow in scope, I suggest that the way to validate (check that no extraneous changes were introduced accidentally) is to diff just the changed lines: git diff -U0 HEAD^ HEAD In patch set 2, I updated other topics marked audience="Cloudera" by CRs that were pushed in the meantime. Change-Id: Ic93d89da77e1f51bbf548a522d98d0c4e2fb31c8 Reviewed-on: http://gerrit.cloudera.org:8080/5613 Reviewed-by: John Russell <jrussell@cloudera.com> Tested-by: Impala Public Jenkins
70 lines
2.9 KiB
XML
70 lines
2.9 KiB
XML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<!--
|
|
Licensed to the Apache Software Foundation (ASF) under one
|
|
or more contributor license agreements. See the NOTICE file
|
|
distributed with this work for additional information
|
|
regarding copyright ownership. The ASF licenses this file
|
|
to you under the Apache License, Version 2.0 (the
|
|
"License"); you may not use this file except in compliance
|
|
with the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing,
|
|
software distributed under the License is distributed on an
|
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
KIND, either express or implied. See the License for the
|
|
specific language governing permissions and limitations
|
|
under the License.
|
|
-->
|
|
<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
|
|
<concept id="parquet_annotate_strings_utf8" rev="2.6.0 IMPALA-2069">
|
|
|
|
<title>PARQUET_ANNOTATE_STRINGS_UTF8 Query Option (<keyword keyref="impala26"/> or higher only)</title>
|
|
<titlealts audience="PDF"><navtitle>PARQUET_ANNOTATE_STRINGS_UTF8</navtitle></titlealts>
|
|
<prolog>
|
|
<metadata>
|
|
<data name="Category" value="Impala"/>
|
|
<data name="Category" value="Impala Query Options"/>
|
|
<data name="Category" value="Parquet"/>
|
|
<data name="Category" value="Developers"/>
|
|
<data name="Category" value="Data Analysts"/>
|
|
</metadata>
|
|
</prolog>
|
|
|
|
<conbody>
|
|
|
|
<p rev="2.6.0 IMPALA-2069">
|
|
<indexterm audience="hidden">PARQUET_ANNOTATE_STRINGS_UTF8 query option</indexterm>
|
|
Causes Impala <codeph>INSERT</codeph> and <codeph>CREATE TABLE AS SELECT</codeph> statements
|
|
to write Parquet files that use the UTF-8 annotation for <codeph>STRING</codeph> columns.
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
|
|
<p>
|
|
By default, Impala represents a <codeph>STRING</codeph> column in Parquet as an unannotated binary field.
|
|
</p>
|
|
<p>
|
|
Impala always uses the UTF-8 annotation when writing <codeph>CHAR</codeph> and <codeph>VARCHAR</codeph>
|
|
columns to Parquet files. An alternative to using the query option is to cast <codeph>STRING</codeph>
|
|
values to <codeph>VARCHAR</codeph>.
|
|
</p>
|
|
<p>
|
|
This option is to help make Impala-written data more interoperable with other data processing engines.
|
|
Impala itself currently does not support all operations on UTF-8 data.
|
|
Although data processed by Impala is typically represented in ASCII, it is valid to designate the
|
|
data as UTF-8 when storing on disk, because ASCII is a subset of UTF-8.
|
|
</p>
|
|
<p conref="../shared/impala_common.xml#common/type_boolean"/>
|
|
<p conref="../shared/impala_common.xml#common/default_false_0"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/added_in_260"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/related_info"/>
|
|
<p>
|
|
<xref href="impala_parquet.xml#parquet"/>
|
|
</p>
|
|
|
|
</conbody>
|
|
</concept>
|