mirror of
https://github.com/apache/impala.git
synced 2025-12-19 09:58:28 -05:00
The recent documentation formatting changes introduced the navigation panel on the left. However, due to the length of the query options navigation title these could overlap with the documentation paragraphs. This commit removes the underscores from the navigation titles of the query options, so browsers can break them into multiple lines. Additionally, the "SET" and "Query Options for the SET Statement" pages are merged to save some more space for the query option navigation titles. Testing: - Built the documentation and tested manually Change-Id: Icec787d7a2af848aaaff65be2ecf311a5ce8fe7f Reviewed-on: http://gerrit.cloudera.org:8080/20556 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Jason Fehr <jfehr@cloudera.com> Reviewed-by: Peter Rozsa <prozsa@cloudera.com> Reviewed-by: Tamas Mate <tmater@apache.org>
110 lines
4.0 KiB
XML
110 lines
4.0 KiB
XML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<!--
|
|
Licensed to the Apache Software Foundation (ASF) under one
|
|
or more contributor license agreements. See the NOTICE file
|
|
distributed with this work for additional information
|
|
regarding copyright ownership. The ASF licenses this file
|
|
to you under the Apache License, Version 2.0 (the
|
|
"License"); you may not use this file except in compliance
|
|
with the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing,
|
|
software distributed under the License is distributed on an
|
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
KIND, either express or implied. See the License for the
|
|
specific language governing permissions and limitations
|
|
under the License.
|
|
-->
|
|
<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
|
|
<concept rev="parquet_block_size" id="parquet_file_size">
|
|
|
|
<title>PARQUET_FILE_SIZE Query Option</title>
|
|
<titlealts audience="PDF"><navtitle>PARQUET FILE SIZE</navtitle></titlealts>
|
|
<prolog>
|
|
<metadata>
|
|
<data name="Category" value="Impala"/>
|
|
<data name="Category" value="Parquet"/>
|
|
<data name="Category" value="ETL"/>
|
|
<data name="Category" value="File Formats"/>
|
|
<data name="Category" value="Impala Query Options"/>
|
|
<data name="Category" value="Developers"/>
|
|
<data name="Category" value="Data Analysts"/>
|
|
</metadata>
|
|
</prolog>
|
|
|
|
<conbody>
|
|
|
|
<p>
|
|
<indexterm audience="hidden">PARQUET_FILE_SIZE query option</indexterm>
|
|
Specifies the maximum size of each Parquet data file produced by Impala <codeph>INSERT</codeph> statements.
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/syntax_blurb"/>
|
|
|
|
<p>
|
|
Specify the size in bytes, or with a trailing <codeph>m</codeph> or <codeph>g</codeph> character to indicate
|
|
megabytes or gigabytes. For example:
|
|
</p>
|
|
|
|
<codeblock>-- 128 megabytes.
|
|
set PARQUET_FILE_SIZE=134217728
|
|
INSERT OVERWRITE parquet_table SELECT * FROM text_table;
|
|
|
|
-- 512 megabytes.
|
|
set PARQUET_FILE_SIZE=512m;
|
|
INSERT OVERWRITE parquet_table SELECT * FROM text_table;
|
|
|
|
-- 1 gigabyte.
|
|
set PARQUET_FILE_SIZE=1g;
|
|
INSERT OVERWRITE parquet_table SELECT * FROM text_table;
|
|
</codeblock>
|
|
|
|
<p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
|
|
|
|
<p>
|
|
With tables that are small or finely partitioned, the default Parquet block size (formerly 1 GB, now 256 MB
|
|
in Impala 2.0 and later) could be much larger than needed for each data file. For <codeph>INSERT</codeph>
|
|
operations into such tables, you can increase parallelism by specifying a smaller
|
|
<codeph>PARQUET_FILE_SIZE</codeph> value, resulting in more HDFS blocks that can be processed by different
|
|
nodes.
|
|
<!-- Reducing the file size also reduces the memory required to buffer each block before writing it to disk. -->
|
|
</p>
|
|
|
|
<p>
|
|
<b>Type:</b> numeric, with optional unit specifier
|
|
</p>
|
|
|
|
<note type="important">
|
|
<p>
|
|
Currently, the maximum value for this setting is 1 gigabyte (<codeph>1g</codeph>).
|
|
Setting a value higher than 1 gigabyte could result in errors during
|
|
an <codeph>INSERT</codeph> operation.
|
|
</p>
|
|
</note>
|
|
|
|
<p>
|
|
<b>Default:</b> 0 (produces files with a target size of 256 MB; files might be larger for very wide tables)
|
|
</p>
|
|
|
|
<p conref="../shared/impala_common.xml#common/adls_block_splitting"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/isilon_blurb"/>
|
|
<p conref="../shared/impala_common.xml#common/isilon_block_size_caveat"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/ozone_blurb"/>
|
|
<p conref="../shared/impala_common.xml#common/ozone_block_size_caveat"/>
|
|
|
|
<p conref="../shared/impala_common.xml#common/related_info"/>
|
|
|
|
<p>
|
|
For information about the Parquet file format, and how the number and size of data files affects query
|
|
performance, see <xref href="impala_parquet.xml#parquet"/>.
|
|
</p>
|
|
|
|
<!-- Examples actually folded into Syntax earlier. <p conref="../shared/impala_common.xml#common/example_blurb"/> -->
|
|
|
|
</conbody>
|
|
</concept>
|