Files
impala/testdata/datasets/tpch/tpch_schema_template.sql
Dimitris Tsirogiannis 041fa6d946 IMPALA-3719: Simplify CREATE TABLE statements with Kudu tables
With this commit we simplify the syntax and handling of CREATE TABLE
statements for both managed and external Kudu tables.

Syntax example:
CREATE TABLE foo(a INT, b STRING, PRIMARY KEY (a, b))
DISTRIBUTE BY HASH (a) INTO 3 BUCKETS,
RANGE (b) SPLIT ROWS (('abc', 'def'))
STORED AS KUDU

Changes:
1) Remove the requirement to specify table properties such as key
   columns in tblproperties.
2) Read table schema (column definitions, primary keys, and distribution
   schemes) from Kudu instead of the HMS.
3) For external tables, the Kudu table is now required to exist at the
   time of creation in Impala.
4) Disallow table properties that could conflict with an existing
   table. Ex: key_columns cannot be specified.
5) Add KUDU as a file format.
6) Add a startup flag to impalad to specify the default Kudu master
   addresses. The flag is used as the default value for the table
   property kudu_master_addresses but it can still be overriden
   using TBLPROPERTIES.
7) Fix a post merge issue (IMPALA-3178) where DROP DATABASE CASCADE
   wasn't implemented for Kudu tables and silently ignored. The Kudu
   tables wouldn't be removed in Kudu.
8) Remove DDL delegates. There was only one functional delegate (for
   Kudu) the existence of the other delegate and the use of delegates in
   general has led to confusion. The Kudu delegate only exists to provide
   functionality missing from Hive.
9) Add PRIMARY KEY at the column and table level. This syntax is fairly
   standard. When used at the column level, only one column can be
   marked as a key. When used at the table level, multiple columns can
   be used as a key. Only Kudu tables are allowed to use PRIMARY KEY.
   The old "kudu.key_columns" table property is no longer accepted
   though it is still used internally. "PRIMARY" is now a keyword.
   The ident style declaration is used for "KEY" because it is also used
   for nested map types.
10) For managed tables, infer a Kudu table name if none was given.
   The table property "kudu.table_name" is optional for managed tables
   and is required for external tables. If for a managed table a Kudu
   table name is not provided, a table name will be generated based
   on the HMS database and table name.
11) Use Kudu master as the source of truth for table metadata instead
   of HMS when a table is loaded or refreshed. Table/column metadata
   are cached in the catalog and are stored in HMS in order to be
   able to use table and column statistics.

Change-Id: I7b9d51b2720ab57649abdb7d5c710ea04ff50dc1
Reviewed-on: http://gerrit.cloudera.org:8080/4414
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Internal Jenkins
2016-10-21 10:52:25 +00:00

280 lines
7.5 KiB
SQL

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# For details on this file format please see hive-benchmark_schema_template.sql
====
---- DATASET
tpch
---- BASE_TABLE_NAME
lineitem
---- COLUMNS
L_ORDERKEY BIGINT
L_PARTKEY BIGINT
L_SUPPKEY BIGINT
L_LINENUMBER INT
L_QUANTITY DECIMAL(12,2)
L_EXTENDEDPRICE DECIMAL(12,2)
L_DISCOUNT DECIMAL(12,2)
L_TAX DECIMAL(12,2)
L_RETURNFLAG STRING
L_LINESTATUS STRING
L_SHIPDATE STRING
L_COMMITDATE STRING
L_RECEIPTDATE STRING
L_SHIPINSTRUCT STRING
L_SHIPMODE STRING
L_COMMENT STRING
---- ROW_FORMAT
DELIMITED FIELDS TERMINATED BY '|'
---- CREATE_KUDU
create table if not exists {db_name}{db_suffix}.{table_name} (
L_ORDERKEY BIGINT,
L_PARTKEY BIGINT,
L_SUPPKEY BIGINT,
L_LINENUMBER INT,
L_QUANTITY DOUBLE,
L_EXTENDEDPRICE DOUBLE,
L_DISCOUNT DOUBLE,
L_TAX DOUBLE,
L_RETURNFLAG STRING,
L_LINESTATUS STRING,
L_SHIPDATE STRING,
L_COMMITDATE STRING,
L_RECEIPTDATE STRING,
L_SHIPINSTRUCT STRING,
L_SHIPMODE STRING,
L_COMMENT STRING,
PRIMARY KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER)
)
distribute by hash (l_orderkey) into 9 buckets stored as kudu;
---- DEPENDENT_LOAD
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
---- LOAD
LOAD DATA LOCAL INPATH '{impala_home}/testdata/impala-data/{db_name}/{table_name}'
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
====
---- DATASET
tpch
---- BASE_TABLE_NAME
part
---- COLUMNS
P_PARTKEY BIGINT
P_NAME STRING
P_MFGR STRING
P_BRAND STRING
P_TYPE STRING
P_SIZE INT
P_CONTAINER STRING
P_RETAILPRICE DECIMAL(12,2)
P_COMMENT STRING
---- ROW_FORMAT
DELIMITED FIELDS TERMINATED BY '|'
---- CREATE_KUDU
create table if not exists {db_name}{db_suffix}.{table_name} (
P_PARTKEY BIGINT PRIMARY KEY,
P_NAME STRING,
P_MFGR STRING,
P_BRAND STRING,
P_TYPE STRING,
P_SIZE INT,
P_CONTAINER STRING,
P_RETAILPRICE DOUBLE,
P_COMMENT STRING
)
distribute by hash (p_partkey) into 9 buckets stored as kudu;
---- DEPENDENT_LOAD
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
---- LOAD
LOAD DATA LOCAL INPATH '{impala_home}/testdata/impala-data/{db_name}/{table_name}'
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
====
---- DATASET
tpch
---- BASE_TABLE_NAME
partsupp
---- COLUMNS
PS_PARTKEY BIGINT
PS_SUPPKEY BIGINT
PS_AVAILQTY INT
PS_SUPPLYCOST DECIMAL(12,2)
PS_COMMENT STRING
---- ROW_FORMAT
DELIMITED FIELDS TERMINATED BY '|'
---- CREATE_KUDU
create table if not exists {db_name}{db_suffix}.{table_name} (
PS_PARTKEY BIGINT,
PS_SUPPKEY BIGINT,
PS_AVAILQTY BIGINT,
PS_SUPPLYCOST DOUBLE,
PS_COMMENT STRING,
PRIMARY KEY(PS_PARTKEY, PS_SUPPKEY)
)
distribute by hash (ps_partkey, ps_suppkey) into 9 buckets stored as kudu;
---- DEPENDENT_LOAD
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
---- LOAD
LOAD DATA LOCAL INPATH '{impala_home}/testdata/impala-data/{db_name}/{table_name}'
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
====
---- DATASET
tpch
---- BASE_TABLE_NAME
supplier
---- COLUMNS
S_SUPPKEY BIGINT
S_NAME STRING
S_ADDRESS STRING
S_NATIONKEY SMALLINT
S_PHONE STRING
S_ACCTBAL DECIMAL(12,2)
S_COMMENT STRING
---- ROW_FORMAT
DELIMITED FIELDS TERMINATED BY '|'
---- CREATE_KUDU
create table if not exists {db_name}{db_suffix}.{table_name} (
S_SUPPKEY BIGINT PRIMARY KEY,
S_NAME STRING,
S_ADDRESS STRING,
S_NATIONKEY SMALLINT,
S_PHONE STRING,
S_ACCTBAL DOUBLE,
S_COMMENT STRING
)
distribute by hash (s_suppkey) into 9 buckets stored as kudu;
---- DEPENDENT_LOAD
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
---- LOAD
LOAD DATA LOCAL INPATH '{impala_home}/testdata/impala-data/{db_name}/{table_name}'
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
====
---- DATASET
tpch
---- BASE_TABLE_NAME
nation
---- COLUMNS
N_NATIONKEY SMALLINT
N_NAME STRING
N_REGIONKEY SMALLINT
N_COMMENT STRING
---- ROW_FORMAT
DELIMITED FIELDS TERMINATED BY '|'
---- CREATE_KUDU
create table if not exists {db_name}{db_suffix}.{table_name} (
N_NATIONKEY SMALLINT PRIMARY KEY,
N_NAME STRING,
N_REGIONKEY SMALLINT,
N_COMMENT STRING
)
distribute by hash (n_nationkey) into 9 buckets stored as kudu;
---- DEPENDENT_LOAD
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
---- LOAD
LOAD DATA LOCAL INPATH '{impala_home}/testdata/impala-data/{db_name}/{table_name}'
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
====
---- DATASET
tpch
---- BASE_TABLE_NAME
region
---- COLUMNS
R_REGIONKEY SMALLINT
R_NAME STRING
R_COMMENT STRING
---- ROW_FORMAT
DELIMITED FIELDS TERMINATED BY '|'
---- CREATE_KUDU
create table if not exists {db_name}{db_suffix}.{table_name} (
R_REGIONKEY SMALLINT PRIMARY KEY,
R_NAME STRING,
R_COMMENT STRING
)
distribute by hash (r_regionkey) into 9 buckets stored as kudu;
---- DEPENDENT_LOAD
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
---- LOAD
LOAD DATA LOCAL INPATH '{impala_home}/testdata/impala-data/{db_name}/{table_name}'
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
====
---- DATASET
tpch
---- BASE_TABLE_NAME
orders
---- COLUMNS
O_ORDERKEY BIGINT
O_CUSTKEY BIGINT
O_ORDERSTATUS STRING
O_TOTALPRICE DECIMAL(12,2)
O_ORDERDATE STRING
O_ORDERPRIORITY STRING
O_CLERK STRING
O_SHIPPRIORITY INT
O_COMMENT STRING
---- ROW_FORMAT
DELIMITED FIELDS TERMINATED BY '|'
---- CREATE_KUDU
create table if not exists {db_name}{db_suffix}.{table_name} (
O_ORDERKEY BIGINT PRIMARY KEY,
O_CUSTKEY BIGINT,
O_ORDERSTATUS STRING,
O_TOTALPRICE DOUBLE,
O_ORDERDATE STRING,
O_ORDERPRIORITY STRING,
O_CLERK STRING,
O_SHIPPRIORITY INT,
O_COMMENT STRING
)
distribute by hash (o_orderkey) into 9 buckets stored as kudu;
---- DEPENDENT_LOAD
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
---- LOAD
LOAD DATA LOCAL INPATH '{impala_home}/testdata/impala-data/{db_name}/{table_name}'
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
====
---- DATASET
tpch
---- BASE_TABLE_NAME
customer
---- COLUMNS
C_CUSTKEY BIGINT
C_NAME STRING
C_ADDRESS STRING
C_NATIONKEY SMALLINT
C_PHONE STRING
C_ACCTBAL DECIMAL(12,2)
C_MKTSEGMENT STRING
C_COMMENT STRING
---- ROW_FORMAT
DELIMITED FIELDS TERMINATED BY '|'
---- CREATE_KUDU
create table if not exists {db_name}{db_suffix}.{table_name} (
C_CUSTKEY BIGINT PRIMARY KEY,
C_NAME STRING,
C_ADDRESS STRING,
C_NATIONKEY SMALLINT,
C_PHONE STRING,
C_ACCTBAL DOUBLE,
C_MKTSEGMENT STRING,
C_COMMENT STRING
)
distribute by hash (c_custkey) into 9 buckets stored as kudu;
---- DEPENDENT_LOAD
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name};
---- LOAD
LOAD DATA LOCAL INPATH '{impala_home}/testdata/impala-data/{db_name}/{table_name}'
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
====