Files
impala/testdata/bin/load-dependent-tables.sql
stiga-huang d2e495e83a IMPALA-13284: Loading test data on Apache Hive3
There are some failures in loading test data on Apache Hive 3.1.3:
 - STORED AS JSONFILE is not supported
 - STORED BY ICEBERG is not supported. Similarly, STORED BY ICEBERG
   STORED AS AVRO is not supported.
 - Missing the jar of iceberg-hive-runtime in CLASSPATH of HMS and Tez
   jobs.
 - Creating table in Impala is not translated to EXTERNAL table in HMS
 - Hive INSERT on insert-only tables failed in generating InsertEvents
   (HIVE-20067).

This patch fixes the syntax issues by using old syntax of Apache Hive
3.1.3:
 - Convert STORED AS JSONFILE to ROW FORMAT SERDE
   'org.apache.hadoop.hive.serde2.JsonSerDe'
 - Convert STORED BY ICEBERG to STORED BY
   'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
 - Convert STORED BY ICEBERG STORED AS AVRO to the above one with
   tblproperties('write.format.default'='avro')
Most of the conversion are done in generate-schema-statements.py. One
exception is in testdata/bin/load-dependent-tables.sql where we need to
generate a new file with the conversion when using it.

The missing jar of iceberg-hive-runtime is added into HIVE_AUX_JARS_PATH
in bin/impala-config.sh. Note that this is only needed by Apache Hive3
since CDP Hive3 has the jar of hive-iceberg-handler in its lib folder.

To fix the failure of InsertEvents, we add the patch of HIVE-20067 and
modify testdata/bin/patch_hive.sh to also recompile the submodule
standalone-metastore.

Modified some statements in
testdata/datasets/functional/functional_schema_template.sql to be more
reliable in retry.

Tests
 - Verified the testdata can be loaded in ubuntu-20.04-from-scratch

Change-Id: I8f52c91602da8822b0f46f19dc4111c7187ce400
Reviewed-on: http://gerrit.cloudera.org:8080/21657
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2024-08-20 07:01:21 +00:00

117 lines
4.6 KiB
SQL

-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
-- Create and load tables that depend upon data in the hive test-warehouse already existing
-- Load a mixed-format table. Hive behaves oddly when mixing formats,
-- but the following incantation ensures that the result is a
-- three-partition table. First is text format, second is sequence
-- file, third is RC file. Must be called after test-warehouse is
-- successfully populated
USE functional;
DROP TABLE IF EXISTS alltypesmixedformat;
CREATE EXTERNAL TABLE alltypesmixedformat (
id int,
bool_col boolean,
tinyint_col tinyint,
smallint_col smallint,
int_col int,
bigint_col bigint,
float_col float,
double_col double,
date_string_col string,
string_col string,
timestamp_col timestamp)
partitioned by (year int, month int)
row format delimited fields terminated by ',' escaped by '\\'
stored as TEXTFILE
LOCATION '/test-warehouse/alltypesmixedformat';
INSERT OVERWRITE TABLE alltypesmixedformat PARTITION (year=2009, month=1)
SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col
FROM alltypes
WHERE year=2009 and month=1;
ALTER TABLE alltypesmixedformat SET FILEFORMAT SEQUENCEFILE;
LOAD DATA INPATH '/tmp/alltypes_seq/year=2009/month=2/'
OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=2);
ALTER TABLE alltypesmixedformat SET FILEFORMAT RCFILE;
LOAD DATA INPATH '/tmp/alltypes_rc/year=2009/month=3/'
OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=3);
ALTER TABLE alltypesmixedformat SET FILEFORMAT PARQUET;
LOAD DATA INPATH '/tmp/alltypes_parquet/year=2009/month=4'
OVERWRITE INTO TABLE alltypesmixedformat PARTITION (year=2009, month=4);
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1)
SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\');
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=1)
SET FILEFORMAT TEXTFILE;
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2)
SET SERDEPROPERTIES('field.delim'=',', 'escape.delim'='\\');
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=2)
SET FILEFORMAT SEQUENCEFILE;
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=3)
SET FILEFORMAT RCFILE;
ALTER TABLE alltypesmixedformat PARTITION (year=2009, month=4)
SET FILEFORMAT PARQUET;
DROP TABLE IF EXISTS functional_parquet.chars_formats;
CREATE EXTERNAL TABLE functional_parquet.chars_formats
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
STORED AS PARQUET
LOCATION '/test-warehouse/chars_formats_parquet';
DROP TABLE IF EXISTS functional_orc_def.chars_formats;
CREATE EXTERNAL TABLE functional_orc_def.chars_formats
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
STORED AS ORC
LOCATION '/test-warehouse/chars_formats_orc_def';
DROP TABLE IF EXISTS functional.chars_formats;
CREATE EXTERNAL TABLE functional.chars_formats
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
ROW FORMAT delimited fields terminated by ',' escaped by '\\'
STORED AS TEXTFILE
LOCATION '/test-warehouse/chars_formats_text';
DROP TABLE IF EXISTS functional_json.chars_formats;
CREATE EXTERNAL TABLE functional_json.chars_formats
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
STORED AS JSONFILE
LOCATION '/test-warehouse/chars_formats_json';
DROP TABLE IF EXISTS functional_avro_snap.chars_formats;
CREATE EXTERNAL TABLE functional_avro_snap.chars_formats
(cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
STORED AS AVRO
LOCATION '/test-warehouse/chars_formats_avro_snap'
TBLPROPERTIES ('avro.schema.literal'='{"type":"record",
"name":"CharTypesTest","doc":"Schema generated by Kite",
"fields":[
{"name":"cs","type":["null","string"], "doc":"Type inferred"},
{"name":"cl","type":["null","string"], "doc":"Type inferred"},
{"name":"vc","type":["null","string"], "doc":"Type inferred"}
]}');
---- Unsupported Impala table types
USE functional;
DROP VIEW IF EXISTS hive_view;
CREATE VIEW hive_view AS SELECT 1 AS int_col FROM alltypes limit 1;