mirror of
https://github.com/apache/impala.git
synced 2025-12-26 14:02:53 -05:00
There are some failures in loading test data on Apache Hive 3.1.3:
- STORED AS JSONFILE is not supported
- STORED BY ICEBERG is not supported. Similarly, STORED BY ICEBERG
STORED AS AVRO is not supported.
- Missing the jar of iceberg-hive-runtime in CLASSPATH of HMS and Tez
jobs.
- Creating table in Impala is not translated to EXTERNAL table in HMS
- Hive INSERT on insert-only tables failed in generating InsertEvents
(HIVE-20067).
This patch fixes the syntax issues by using old syntax of Apache Hive
3.1.3:
- Convert STORED AS JSONFILE to ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.JsonSerDe'
- Convert STORED BY ICEBERG to STORED BY
'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
- Convert STORED BY ICEBERG STORED AS AVRO to the above one with
tblproperties('write.format.default'='avro')
Most of the conversion are done in generate-schema-statements.py. One
exception is in testdata/bin/load-dependent-tables.sql where we need to
generate a new file with the conversion when using it.
The missing jar of iceberg-hive-runtime is added into HIVE_AUX_JARS_PATH
in bin/impala-config.sh. Note that this is only needed by Apache Hive3
since CDP Hive3 has the jar of hive-iceberg-handler in its lib folder.
To fix the failure of InsertEvents, we add the patch of HIVE-20067 and
modify testdata/bin/patch_hive.sh to also recompile the submodule
standalone-metastore.
Modified some statements in
testdata/datasets/functional/functional_schema_template.sql to be more
reliable in retry.
Tests
- Verified the testdata can be loaded in ubuntu-20.04-from-scratch
Change-Id: I8f52c91602da8822b0f46f19dc4111c7187ce400
Reviewed-on: http://gerrit.cloudera.org:8080/21657
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
117 lines
4.6 KiB
SQL
117 lines
4.6 KiB
SQL
-- Licensed to the Apache Software Foundation (ASF) under one
|
|
-- or more contributor license agreements. See the NOTICE file
|
|
-- distributed with this work for additional information
|
|
-- regarding copyright ownership. The ASF licenses this file
|
|
-- to you under the Apache License, Version 2.0 (the
|
|
-- "License"); you may not use this file except in compliance
|
|
-- with the License. You may obtain a copy of the License at
|
|
--
|
|
-- http://www.apache.org/licenses/LICENSE-2.0
|
|
--
|
|
-- Unless required by applicable law or agreed to in writing,
|
|
-- software distributed under the License is distributed on an
|
|
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
-- KIND, either express or implied. See the License for the
|
|
-- specific language governing permissions and limitations
|
|
-- under the License.
|
|
|
|
-- Create and load tables that depend upon data in the hive test-warehouse already existing
|
|
|
|
-- Load a mixed-format table. Hive behaves oddly when mixing formats,
|
|
-- but the following incantation ensures that the result is a
|
|
-- three-partition table. First is text format, second is sequence
|
|
-- file, third is RC file. Must be called after test-warehouse is
|
|
-- successfully populated
|
|
USE functional;
|
|
DROP TABLE IF EXISTS alltypesmixedformat;
|
|
CREATE EXTERNAL TABLE alltypesmixedformat (
|
|
id int,
|
|
bool_col boolean,
|
|
tinyint_col tinyint,
|
|
smallint_col smallint,
|
|
int_col int,
|
|
bigint_col bigint,
|
|
float_col float,
|
|
double_col double,
|
|
date_string_col string,
|
|
string_col string,
|
|
timestamp_col timestamp)
|
|
partitioned by (year int, month int)
|
|
row format delimited fields terminated by ',' escaped by '\\'
|
|
stored as TEXTFILE
|
|
LOCATION '/test-warehouse/alltypesmixedformat';
|
|
|
|
INSERT OVERWRITE TABLE alltypesmixedformat PARTITION (year=2009, month=1)
|
|
SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col, timestamp_col
|
|
FROM alltypes
|
|
WHERE year=2009 and month=1;
|
|
|
|
ALTER TABLE alltypesmixedformat SET FILEFORMAT SEQUENCEFILE;
|
|
LOAD DATA INPATH '/tmp/alltypes_seq/year=2009/month=2/'
|
|
OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=2);
|
|
|
|
ALTER TABLE alltypesmixedformat SET FILEFORMAT RCFILE;
|
|
LOAD DATA INPATH '/tmp/alltypes_rc/year=2009/month=3/'
|
|
OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=3);
|
|
|
|
ALTER TABLE alltypesmixedformat SET FILEFORMAT PARQUET;
|
|
LOAD DATA INPATH '/tmp/alltypes_parquet/year=2009/month=4'
|
|
OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=4);
|
|
|
|
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1)
|
|
SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\');
|
|
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1)
|
|
SET FILEFORMAT TEXTFILE;
|
|
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2)
|
|
SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\');
|
|
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2)
|
|
SET FILEFORMAT SEQUENCEFILE;
|
|
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=3)
|
|
SET FILEFORMAT RCFILE;
|
|
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=4)
|
|
SET FILEFORMAT PARQUET;
|
|
|
|
DROP TABLE IF EXISTS functional_parquet.chars_formats;
|
|
CREATE EXTERNAL TABLE functional_parquet.chars_formats
|
|
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
|
|
STORED AS PARQUET
|
|
LOCATION '/test-warehouse/chars_formats_parquet';
|
|
|
|
DROP TABLE IF EXISTS functional_orc_def.chars_formats;
|
|
CREATE EXTERNAL TABLE functional_orc_def.chars_formats
|
|
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
|
|
STORED AS ORC
|
|
LOCATION '/test-warehouse/chars_formats_orc_def';
|
|
|
|
DROP TABLE IF EXISTS functional.chars_formats;
|
|
CREATE EXTERNAL TABLE functional.chars_formats
|
|
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
|
|
ROW FORMAT delimited fields terminated by ',' escaped by '\\'
|
|
STORED AS TEXTFILE
|
|
LOCATION '/test-warehouse/chars_formats_text';
|
|
|
|
DROP TABLE IF EXISTS functional_json.chars_formats;
|
|
CREATE EXTERNAL TABLE functional_json.chars_formats
|
|
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
|
|
STORED AS JSONFILE
|
|
LOCATION '/test-warehouse/chars_formats_json';
|
|
|
|
DROP TABLE IF EXISTS functional_avro_snap.chars_formats;
|
|
CREATE EXTERNAL TABLE functional_avro_snap.chars_formats
|
|
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
|
|
STORED AS AVRO
|
|
LOCATION '/test-warehouse/chars_formats_avro_snap'
|
|
TBLPROPERTIES ('avro.schema.literal'='{"type":"record",
|
|
"name":"CharTypesTest","doc":"Schema generated by Kite",
|
|
"fields":[
|
|
{"name":"cs","type":["null","string"], "doc":"Type inferred"},
|
|
{"name":"cl","type":["null","string"], "doc":"Type inferred"},
|
|
{"name":"vc","type":["null","string"], "doc":"Type inferred"}
|
|
]}');
|
|
|
|
---- Unsupported Impala table types
|
|
USE functional;
|
|
DROP VIEW IF EXISTS hive_view;
|
|
CREATE VIEW hive_view AS SELECT 1 AS int_col FROM alltypes limit 1;
|