mirror of
https://github.com/apache/impala.git
synced 2026-01-06 15:01:43 -05:00
With this commit we simplify the syntax and handling of CREATE TABLE
statements for both managed and external Kudu tables.
Syntax example:
CREATE TABLE foo(a INT, b STRING, PRIMARY KEY (a, b))
DISTRIBUTE BY HASH (a) INTO 3 BUCKETS,
RANGE (b) SPLIT ROWS (('abc', 'def'))
STORED AS KUDU
Changes:
1) Remove the requirement to specify table properties such as key
columns in tblproperties.
2) Read table schema (column definitions, primary keys, and distribution
schemes) from Kudu instead of the HMS.
3) For external tables, the Kudu table is now required to exist at the
time of creation in Impala.
4) Disallow table properties that could conflict with an existing
table. Ex: key_columns cannot be specified.
5) Add KUDU as a file format.
6) Add a startup flag to impalad to specify the default Kudu master
addresses. The flag is used as the default value for the table
property kudu_master_addresses but it can still be overriden
using TBLPROPERTIES.
7) Fix a post merge issue (IMPALA-3178) where DROP DATABASE CASCADE
wasn't implemented for Kudu tables and silently ignored. The Kudu
tables wouldn't be removed in Kudu.
8) Remove DDL delegates. There was only one functional delegate (for
Kudu) the existence of the other delegate and the use of delegates in
general has led to confusion. The Kudu delegate only exists to provide
functionality missing from Hive.
9) Add PRIMARY KEY at the column and table level. This syntax is fairly
standard. When used at the column level, only one column can be
marked as a key. When used at the table level, multiple columns can
be used as a key. Only Kudu tables are allowed to use PRIMARY KEY.
The old "kudu.key_columns" table property is no longer accepted
though it is still used internally. "PRIMARY" is now a keyword.
The ident style declaration is used for "KEY" because it is also used
for nested map types.
10) For managed tables, infer a Kudu table name if none was given.
The table property "kudu.table_name" is optional for managed tables
and is required for external tables. If for a managed table a Kudu
table name is not provided, a table name will be generated based
on the HMS database and table name.
11) Use Kudu master as the source of truth for table metadata instead
of HMS when a table is loaded or refreshed. Table/column metadata
are cached in the catalog and are stored in HMS in order to be
able to use table and column statistics.
Change-Id: I7b9d51b2720ab57649abdb7d5c710ea04ff50dc1
Reviewed-on: http://gerrit.cloudera.org:8080/4414
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Internal Jenkins
280 lines
7.5 KiB
SQL
280 lines
7.5 KiB
SQL
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
# For details on this file format please see hive-benchmark_schema_template.sql
|
|
====
|
|
---- DATASET
|
|
tpch
|
|
---- BASE_TABLE_NAME
|
|
lineitem
|
|
---- COLUMNS
|
|
L_ORDERKEY BIGINT
|
|
L_PARTKEY BIGINT
|
|
L_SUPPKEY BIGINT
|
|
L_LINENUMBER INT
|
|
L_QUANTITY DECIMAL(12,2)
|
|
L_EXTENDEDPRICE DECIMAL(12,2)
|
|
L_DISCOUNT DECIMAL(12,2)
|
|
L_TAX DECIMAL(12,2)
|
|
L_RETURNFLAG STRING
|
|
L_LINESTATUS STRING
|
|
L_SHIPDATE STRING
|
|
L_COMMITDATE STRING
|
|
L_RECEIPTDATE STRING
|
|
L_SHIPINSTRUCT STRING
|
|
L_SHIPMODE STRING
|
|
L_COMMENT STRING
|
|
---- ROW_FORMAT
|
|
DELIMITED FIELDS TERMINATED BY '|'
|
|
---- CREATE_KUDU
|
|
create table if not exists {db_name}{db_suffix}.{table_name} (
|
|
L_ORDERKEY BIGINT,
|
|
L_PARTKEY BIGINT,
|
|
L_SUPPKEY BIGINT,
|
|
L_LINENUMBER INT,
|
|
L_QUANTITY DOUBLE,
|
|
L_EXTENDEDPRICE DOUBLE,
|
|
L_DISCOUNT DOUBLE,
|
|
L_TAX DOUBLE,
|
|
L_RETURNFLAG STRING,
|
|
L_LINESTATUS STRING,
|
|
L_SHIPDATE STRING,
|
|
L_COMMITDATE STRING,
|
|
L_RECEIPTDATE STRING,
|
|
L_SHIPINSTRUCT STRING,
|
|
L_SHIPMODE STRING,
|
|
L_COMMENT STRING,
|
|
PRIMARY KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER)
|
|
)
|
|
distribute by hash (l_orderkey) into 9 buckets stored as kudu;
|
|
---- DEPENDENT_LOAD
|
|
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
|
|
---- LOAD
|
|
LOAD DATA LOCAL INPATH '{impala_home}/testdata/impala-data/{db_name}/{table_name}'
|
|
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
|
|
====
|
|
---- DATASET
|
|
tpch
|
|
---- BASE_TABLE_NAME
|
|
part
|
|
---- COLUMNS
|
|
P_PARTKEY BIGINT
|
|
P_NAME STRING
|
|
P_MFGR STRING
|
|
P_BRAND STRING
|
|
P_TYPE STRING
|
|
P_SIZE INT
|
|
P_CONTAINER STRING
|
|
P_RETAILPRICE DECIMAL(12,2)
|
|
P_COMMENT STRING
|
|
---- ROW_FORMAT
|
|
DELIMITED FIELDS TERMINATED BY '|'
|
|
---- CREATE_KUDU
|
|
create table if not exists {db_name}{db_suffix}.{table_name} (
|
|
P_PARTKEY BIGINT PRIMARY KEY,
|
|
P_NAME STRING,
|
|
P_MFGR STRING,
|
|
P_BRAND STRING,
|
|
P_TYPE STRING,
|
|
P_SIZE INT,
|
|
P_CONTAINER STRING,
|
|
P_RETAILPRICE DOUBLE,
|
|
P_COMMENT STRING
|
|
)
|
|
distribute by hash (p_partkey) into 9 buckets stored as kudu;
|
|
---- DEPENDENT_LOAD
|
|
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
|
|
---- LOAD
|
|
LOAD DATA LOCAL INPATH '{impala_home}/testdata/impala-data/{db_name}/{table_name}'
|
|
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
|
|
====
|
|
---- DATASET
|
|
tpch
|
|
---- BASE_TABLE_NAME
|
|
partsupp
|
|
---- COLUMNS
|
|
PS_PARTKEY BIGINT
|
|
PS_SUPPKEY BIGINT
|
|
PS_AVAILQTY INT
|
|
PS_SUPPLYCOST DECIMAL(12,2)
|
|
PS_COMMENT STRING
|
|
---- ROW_FORMAT
|
|
DELIMITED FIELDS TERMINATED BY '|'
|
|
---- CREATE_KUDU
|
|
create table if not exists {db_name}{db_suffix}.{table_name} (
|
|
PS_PARTKEY BIGINT,
|
|
PS_SUPPKEY BIGINT,
|
|
PS_AVAILQTY BIGINT,
|
|
PS_SUPPLYCOST DOUBLE,
|
|
PS_COMMENT STRING,
|
|
PRIMARY KEY(PS_PARTKEY, PS_SUPPKEY)
|
|
)
|
|
distribute by hash (ps_partkey, ps_suppkey) into 9 buckets stored as kudu;
|
|
---- DEPENDENT_LOAD
|
|
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
|
|
---- LOAD
|
|
LOAD DATA LOCAL INPATH '{impala_home}/testdata/impala-data/{db_name}/{table_name}'
|
|
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
|
|
====
|
|
---- DATASET
|
|
tpch
|
|
---- BASE_TABLE_NAME
|
|
supplier
|
|
---- COLUMNS
|
|
S_SUPPKEY BIGINT
|
|
S_NAME STRING
|
|
S_ADDRESS STRING
|
|
S_NATIONKEY SMALLINT
|
|
S_PHONE STRING
|
|
S_ACCTBAL DECIMAL(12,2)
|
|
S_COMMENT STRING
|
|
---- ROW_FORMAT
|
|
DELIMITED FIELDS TERMINATED BY '|'
|
|
---- CREATE_KUDU
|
|
create table if not exists {db_name}{db_suffix}.{table_name} (
|
|
S_SUPPKEY BIGINT PRIMARY KEY,
|
|
S_NAME STRING,
|
|
S_ADDRESS STRING,
|
|
S_NATIONKEY SMALLINT,
|
|
S_PHONE STRING,
|
|
S_ACCTBAL DOUBLE,
|
|
S_COMMENT STRING
|
|
)
|
|
distribute by hash (s_suppkey) into 9 buckets stored as kudu;
|
|
---- DEPENDENT_LOAD
|
|
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
|
|
---- LOAD
|
|
LOAD DATA LOCAL INPATH '{impala_home}/testdata/impala-data/{db_name}/{table_name}'
|
|
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
|
|
====
|
|
---- DATASET
|
|
tpch
|
|
---- BASE_TABLE_NAME
|
|
nation
|
|
---- COLUMNS
|
|
N_NATIONKEY SMALLINT
|
|
N_NAME STRING
|
|
N_REGIONKEY SMALLINT
|
|
N_COMMENT STRING
|
|
---- ROW_FORMAT
|
|
DELIMITED FIELDS TERMINATED BY '|'
|
|
---- CREATE_KUDU
|
|
create table if not exists {db_name}{db_suffix}.{table_name} (
|
|
N_NATIONKEY SMALLINT PRIMARY KEY,
|
|
N_NAME STRING,
|
|
N_REGIONKEY SMALLINT,
|
|
N_COMMENT STRING
|
|
)
|
|
distribute by hash (n_nationkey) into 9 buckets stored as kudu;
|
|
---- DEPENDENT_LOAD
|
|
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
|
|
---- LOAD
|
|
LOAD DATA LOCAL INPATH '{impala_home}/testdata/impala-data/{db_name}/{table_name}'
|
|
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
|
|
====
|
|
---- DATASET
|
|
tpch
|
|
---- BASE_TABLE_NAME
|
|
region
|
|
---- COLUMNS
|
|
R_REGIONKEY SMALLINT
|
|
R_NAME STRING
|
|
R_COMMENT STRING
|
|
---- ROW_FORMAT
|
|
DELIMITED FIELDS TERMINATED BY '|'
|
|
---- CREATE_KUDU
|
|
create table if not exists {db_name}{db_suffix}.{table_name} (
|
|
R_REGIONKEY SMALLINT PRIMARY KEY,
|
|
R_NAME STRING,
|
|
R_COMMENT STRING
|
|
)
|
|
distribute by hash (r_regionkey) into 9 buckets stored as kudu;
|
|
---- DEPENDENT_LOAD
|
|
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
|
|
---- LOAD
|
|
LOAD DATA LOCAL INPATH '{impala_home}/testdata/impala-data/{db_name}/{table_name}'
|
|
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
|
|
====
|
|
---- DATASET
|
|
tpch
|
|
---- BASE_TABLE_NAME
|
|
orders
|
|
---- COLUMNS
|
|
O_ORDERKEY BIGINT
|
|
O_CUSTKEY BIGINT
|
|
O_ORDERSTATUS STRING
|
|
O_TOTALPRICE DECIMAL(12,2)
|
|
O_ORDERDATE STRING
|
|
O_ORDERPRIORITY STRING
|
|
O_CLERK STRING
|
|
O_SHIPPRIORITY INT
|
|
O_COMMENT STRING
|
|
---- ROW_FORMAT
|
|
DELIMITED FIELDS TERMINATED BY '|'
|
|
---- CREATE_KUDU
|
|
create table if not exists {db_name}{db_suffix}.{table_name} (
|
|
O_ORDERKEY BIGINT PRIMARY KEY,
|
|
O_CUSTKEY BIGINT,
|
|
O_ORDERSTATUS STRING,
|
|
O_TOTALPRICE DOUBLE,
|
|
O_ORDERDATE STRING,
|
|
O_ORDERPRIORITY STRING,
|
|
O_CLERK STRING,
|
|
O_SHIPPRIORITY INT,
|
|
O_COMMENT STRING
|
|
)
|
|
distribute by hash (o_orderkey) into 9 buckets stored as kudu;
|
|
---- DEPENDENT_LOAD
|
|
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
|
|
---- LOAD
|
|
LOAD DATA LOCAL INPATH '{impala_home}/testdata/impala-data/{db_name}/{table_name}'
|
|
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
|
|
====
|
|
---- DATASET
|
|
tpch
|
|
---- BASE_TABLE_NAME
|
|
customer
|
|
---- COLUMNS
|
|
C_CUSTKEY BIGINT
|
|
C_NAME STRING
|
|
C_ADDRESS STRING
|
|
C_NATIONKEY SMALLINT
|
|
C_PHONE STRING
|
|
C_ACCTBAL DECIMAL(12,2)
|
|
C_MKTSEGMENT STRING
|
|
C_COMMENT STRING
|
|
---- ROW_FORMAT
|
|
DELIMITED FIELDS TERMINATED BY '|'
|
|
---- CREATE_KUDU
|
|
create table if not exists {db_name}{db_suffix}.{table_name} (
|
|
C_CUSTKEY BIGINT PRIMARY KEY,
|
|
C_NAME STRING,
|
|
C_ADDRESS STRING,
|
|
C_NATIONKEY SMALLINT,
|
|
C_PHONE STRING,
|
|
C_ACCTBAL DOUBLE,
|
|
C_MKTSEGMENT STRING,
|
|
C_COMMENT STRING
|
|
)
|
|
distribute by hash (c_custkey) into 9 buckets stored as kudu;
|
|
---- DEPENDENT_LOAD
|
|
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
|
|
---- LOAD
|
|
LOAD DATA LOCAL INPATH '{impala_home}/testdata/impala-data/{db_name}/{table_name}'
|
|
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
|
|
====
|