mirror of
https://github.com/apache/impala.git
synced 2025-12-30 03:01:44 -05:00
This fixes three issues for functional dataset loading: - works around HIVE-21675, a bug in which 'CREATE VIEW IF NOT EXISTS' does not function correctly in our current Hive build. This has been fixed already, but the workaround is pretty simple, and actually the 'drop and recreate' pattern is used more widely for data-loading than the 'create if not exists' one. - Moves the creation of the 'hive_index' table from load-dependent-tables.sql to a new load-dependent-tables-hive2.sql file which is only executed on Hive 2. - Moving from MR to Tez execution changed the behavior of data loading by disabling the auto-merging of small files. With Hive-on-MR, this behavior defaulted to true, but with Hive-on-Tez it defaults false. The change is likely motivated by the fact that Tez automatically groups small splits on the _input_ side and thus is less likely to produce lots of small files. However, that grouping functionality doesn't work properly in localhost clusters (TEZ-3310) so we aren't seeing the benefit. So, this patch enables the post-process merging of small files. Prior to this change, the 'alltypesaggmultifilesnopart' test table was getting 40+ files inside it, which broke various planner tests. With the change, it gets the expected 4 files. Change-Id: Ic34930dc064da3136dde4e01a011d14db6a74ecd Reviewed-on: http://gerrit.cloudera.org:8080/13251 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
111 lines
4.4 KiB
SQL
111 lines
4.4 KiB
SQL
-- Licensed to the Apache Software Foundation (ASF) under one
|
|
-- or more contributor license agreements. See the NOTICE file
|
|
-- distributed with this work for additional information
|
|
-- regarding copyright ownership. The ASF licenses this file
|
|
-- to you under the Apache License, Version 2.0 (the
|
|
-- "License"); you may not use this file except in compliance
|
|
-- with the License. You may obtain a copy of the License at
|
|
--
|
|
-- http://www.apache.org/licenses/LICENSE-2.0
|
|
--
|
|
-- Unless required by applicable law or agreed to in writing,
|
|
-- software distributed under the License is distributed on an
|
|
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
-- KIND, either express or implied. See the License for the
|
|
-- specific language governing permissions and limitations
|
|
-- under the License.
|
|
|
|
-- Create and load tables that depend upon data in the hive test-warehouse already existing
|
|
|
|
-- Load a mixed-format table. Hive behaves oddly when mixing formats,
|
|
-- but the following incantation ensures that the result is a
|
|
-- three-partition table. First is text format, second is sequence
|
|
-- file, third is RC file. Must be called after test-warehouse is
|
|
-- successfully populated
|
|
USE functional;
|
|
DROP TABLE IF EXISTS alltypesmixedformat;
|
|
CREATE EXTERNAL TABLE alltypesmixedformat (
|
|
id int,
|
|
bool_col boolean,
|
|
tinyint_col tinyint,
|
|
smallint_col smallint,
|
|
int_col int,
|
|
bigint_col bigint,
|
|
float_col float,
|
|
double_col double,
|
|
date_string_col string,
|
|
string_col string,
|
|
timestamp_col timestamp)
|
|
partitioned by (year int, month int)
|
|
row format delimited fields terminated by ',' escaped by '\\'
|
|
stored as TEXTFILE
|
|
LOCATION '/test-warehouse/alltypesmixedformat';
|
|
|
|
INSERT OVERWRITE TABLE alltypesmixedformat PARTITION (year=2009, month=1)
|
|
SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col, timestamp_col
|
|
FROM alltypes
|
|
WHERE year=2009 and month=1;
|
|
|
|
ALTER TABLE alltypesmixedformat SET FILEFORMAT SEQUENCEFILE;
|
|
LOAD DATA INPATH '/tmp/alltypes_seq/year=2009/month=2/'
|
|
OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=2);
|
|
|
|
ALTER TABLE alltypesmixedformat SET FILEFORMAT RCFILE;
|
|
LOAD DATA INPATH '/tmp/alltypes_rc/year=2009/month=3/'
|
|
OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=3);
|
|
|
|
ALTER TABLE alltypesmixedformat SET FILEFORMAT PARQUET;
|
|
LOAD DATA INPATH '/tmp/alltypes_parquet/year=2009/month=4'
|
|
OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=4);
|
|
|
|
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1)
|
|
SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\');
|
|
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1)
|
|
SET FILEFORMAT TEXTFILE;
|
|
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2)
|
|
SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\');
|
|
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2)
|
|
SET FILEFORMAT SEQUENCEFILE;
|
|
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=3)
|
|
SET FILEFORMAT RCFILE;
|
|
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=4)
|
|
SET FILEFORMAT PARQUET;
|
|
|
|
DROP TABLE IF EXISTS functional_parquet.chars_formats;
|
|
CREATE EXTERNAL TABLE functional_parquet.chars_formats
|
|
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
|
|
STORED AS PARQUET
|
|
LOCATION '/test-warehouse/chars_formats_parquet';
|
|
|
|
DROP TABLE IF EXISTS functional_orc_def.chars_formats;
|
|
CREATE EXTERNAL TABLE functional_orc_def.chars_formats
|
|
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
|
|
STORED AS ORC
|
|
LOCATION '/test-warehouse/chars_formats_orc_def';
|
|
|
|
DROP TABLE IF EXISTS functional.chars_formats;
|
|
CREATE EXTERNAL TABLE functional.chars_formats
|
|
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
|
|
ROW FORMAT delimited fields terminated by ',' escaped by '\\'
|
|
STORED AS TEXTFILE
|
|
LOCATION '/test-warehouse/chars_formats_text';
|
|
|
|
DROP TABLE IF EXISTS functional_avro_snap.chars_formats;
|
|
CREATE EXTERNAL TABLE functional_avro_snap.chars_formats
|
|
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
|
|
STORED AS AVRO
|
|
LOCATION '/test-warehouse/chars_formats_avro_snap'
|
|
TBLPROPERTIES ('avro.schema.literal'='{"type":"record",
|
|
"name":"CharTypesTest","doc":"Schema generated by Kite",
|
|
"fields":[
|
|
{"name":"cs","type":["null","string"], "doc":"Type inferred"},
|
|
{"name":"cl","type":["null","string"], "doc":"Type inferred"},
|
|
{"name":"vc","type":["null","string"], "doc":"Type inferred"}
|
|
]}');
|
|
|
|
---- Unsupported Impala table types
|
|
USE functional;
|
|
DROP VIEW IF EXISTS hive_view;
|
|
CREATE VIEW hive_view AS SELECT 1 AS int_col FROM alltypes limit 1;
|