mirror of
https://github.com/apache/impala.git
synced 2026-01-02 12:00:33 -05:00
This change adds support for adding SORT BY (...) clauses to CREATE TABLE and ALTER TABLE statements. Examples are: CREATE TABLE t (i INT, j INT, k INT) PARTITIONED BY (l INT) SORT BY (i, j); CREATE TABLE t SORT BY (int_col,id) LIKE u; CREATE TABLE t LIKE PARQUET '/foo' SORT BY (id,zip); ALTER TABLE t SORT BY (int_col,id); ALTER TABLE t SORT BY (); Sort columns can only be specified for Hdfs tables and effectiveness may vary based on storage type; for example TEXT tables will not see improved compression. The SORT BY clause must not contain clustering columns. The columns in the SORT BY clause are stored in the 'sort.columns' table property and will result in an additional SORT node being added to the plan before the final table sink. Specifying sort columns also enables clustering during inserts, so the SORT node will contain all partitioning columns first, followed by the sort columns. We do this because sort columns add a SORT node to the plan and adding the clustering columns to the SORT node is cheap. Sort columns supersede the sortby() hint, which we will remove in a subsequent change (IMPALA-5144). Until then, it is possible to specify sort columns using both ways at the same time and the column lists will be concatenated. Change-Id: I08834f38a941786ab45a4381c2732d929a934f75 Reviewed-on: http://gerrit.cloudera.org:8080/6495 Reviewed-by: Lars Volker <lv@cloudera.com> Tested-by: Impala Public Jenkins
286 lines
15 KiB
Plaintext
286 lines
15 KiB
Plaintext
====
|
|
---- QUERY
|
|
create table $DATABASE.testtbl(i int, s string COMMENT 'String col') STORED AS TEXTFILE
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Make sure creating a table with the same name doesn't throw an error when
|
|
# IF NOT EXISTS is specified.
|
|
create table if not exists $DATABASE.testtbl(i int, s string)
|
|
ROW FORMAT DELIMITED
|
|
FIELDS TERMINATED BY '\t'
|
|
ESCAPED BY '\\'
|
|
LINES TERMINATED BY '\n'
|
|
STORED AS TEXTFILE
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
show tables in $DATABASE
|
|
---- RESULTS
|
|
'testtbl'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
describe $DATABASE.testtbl
|
|
---- RESULTS
|
|
'i','int',''
|
|
's','string','String col'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
insert overwrite table $DATABASE.testtbl SELECT 1, 'Hi'
|
|
from functional.alltypes limit 10
|
|
---- RESULTS
|
|
: 10
|
|
====
|
|
---- QUERY
|
|
select * from $DATABASE.testtbl
|
|
---- RESULTS
|
|
1,'Hi'
|
|
1,'Hi'
|
|
1,'Hi'
|
|
1,'Hi'
|
|
1,'Hi'
|
|
1,'Hi'
|
|
1,'Hi'
|
|
1,'Hi'
|
|
1,'Hi'
|
|
1,'Hi'
|
|
---- TYPES
|
|
INT, STRING
|
|
====
|
|
---- QUERY
|
|
create table $DATABASE.testtbl_part(i int, s string) PARTITIONED BY (id int comment 'C')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Partition columns are displayed as part of DESCRIBE <table>
|
|
describe $DATABASE.testtbl_part
|
|
---- RESULTS
|
|
'i','int',''
|
|
's','string',''
|
|
'id','int','C'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
insert overwrite table $DATABASE.testtbl_part partition(id=1)
|
|
select 10, 'Ten' from functional.alltypes limit 1
|
|
---- RESULTS
|
|
id=1/: 1
|
|
====
|
|
---- QUERY
|
|
insert overwrite table $DATABASE.testtbl_part partition(id=2)
|
|
select 20, 'Twenty' from functional.alltypes limit 2
|
|
---- RESULTS
|
|
id=2/: 2
|
|
====
|
|
---- QUERY
|
|
select * from $DATABASE.testtbl_part
|
|
---- RESULTS
|
|
10,'Ten',1
|
|
20,'Twenty',2
|
|
20,'Twenty',2
|
|
---- TYPES
|
|
INT, STRING, INT
|
|
====
|
|
---- QUERY
|
|
select * from $DATABASE.testtbl_part where id = 1
|
|
---- RESULTS
|
|
10,'Ten',1
|
|
---- TYPES
|
|
INT, STRING, INT
|
|
====
|
|
---- QUERY
|
|
show tables
|
|
---- RESULTS
|
|
'testtbl'
|
|
'testtbl_part'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Make sure we create the table in the proper database after a "use"
|
|
create table testtbl2(f float, d double) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
show tables
|
|
---- RESULTS
|
|
'testtbl'
|
|
'testtbl2'
|
|
'testtbl_part'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
drop table testtbl2
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
show tables
|
|
---- RESULTS
|
|
'testtbl'
|
|
'testtbl_part'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Test creating an unpartitioned Avro table without column definitions.
|
|
create table avro_alltypes_nopart
|
|
with serdeproperties
|
|
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
|
|
stored as avro
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe avro_alltypes_nopart
|
|
---- RESULTS
|
|
'bigint_col','bigint','from deserializer'
|
|
'bool_col','boolean','from deserializer'
|
|
'date_string_col','string','from deserializer'
|
|
'double_col','double','from deserializer'
|
|
'float_col','float','from deserializer'
|
|
'id','int','from deserializer'
|
|
'int_col','int','from deserializer'
|
|
'smallint_col','int','from deserializer'
|
|
'string_col','string','from deserializer'
|
|
'timestamp_col','string','from deserializer'
|
|
'tinyint_col','int','from deserializer'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test creating a partitioned Avro table without column definitions.
|
|
create table avro_alltypes_part
|
|
partitioned by (year int, month int)
|
|
with serdeproperties
|
|
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
|
|
stored as avro
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe avro_alltypes_part
|
|
---- RESULTS
|
|
'bigint_col','bigint','from deserializer'
|
|
'bool_col','boolean','from deserializer'
|
|
'date_string_col','string','from deserializer'
|
|
'double_col','double','from deserializer'
|
|
'float_col','float','from deserializer'
|
|
'id','int','from deserializer'
|
|
'int_col','int','from deserializer'
|
|
'month','int',''
|
|
'smallint_col','int','from deserializer'
|
|
'string_col','string','from deserializer'
|
|
'timestamp_col','string','from deserializer'
|
|
'tinyint_col','int','from deserializer'
|
|
'year','int',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop table avro_alltypes_part
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
drop table avro_alltypes_nopart
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
drop table testtbl
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
drop table testtbl_part
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
show tables
|
|
---- RESULTS
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
drop table if exists non_existent_db.tbl
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Test table creation with tblproperty and serdeproperty lengths just within limits
|
|
create table $DATABASE.short_properties (i int)
|
|
with serdeproperties(
|
|
'keykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeyk'
|
|
=
|
|
'valuevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevalue'
|
|
) tblproperties(
|
|
'keykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeykeyk'
|
|
=
|
|
'valuevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevalue'
|
|
)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1740: Test setting the skip.header.line.count tblproperty
|
|
create table skip_header_test_a (i1 integer) tblproperties('skip.header.line.count'='2')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1740: Test setting the skip.header.line.count tblproperty on a Parquet table
|
|
create table skip_header_test_d (i1 integer) stored as parquet
|
|
tblproperties('skip.header.line.count'='2')
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1740: Test setting an invalid skip.header.line.count tblproperty
|
|
create table skip_header_test_b (i1 integer) tblproperties('skip.header.line.count'='-1')
|
|
---- CATCH
|
|
Invalid value for table property skip.header.line.count:
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1740: Test setting an invalid skip.header.line.count tblproperty
|
|
create table skip_header_test_c (i1 integer) tblproperties('skip.header.line.count'='foo')
|
|
---- CATCH
|
|
Invalid value for table property skip.header.line.count:
|
|
====
|
|
---- QUERY
|
|
# IMPALA-2820: Test keywords as struct-field names. All field names below are
|
|
# Impala keywords, but only some of them are quoted to show that a mix of
|
|
# quoted/unquoted identifiers is possible.
|
|
create table $DATABASE.struct_keywords (
|
|
s struct<table:int,`comment`:string,`select`:decimal(8,2)>,
|
|
a array<struct<`replace`:boolean,`location`:varchar(10)>>,
|
|
m map<string,struct<`fields`:int,from:timestamp>>
|
|
)
|
|
partitioned by (year int, month int);
|
|
describe $DATABASE.struct_keywords;
|
|
---- RESULTS
|
|
's','struct<\n table:int,\n comment:string,\n select:decimal(8,2)\n>',''
|
|
'a','array<struct<\n replace:boolean,\n location:varchar(10)\n>>',''
|
|
'm','map<string,struct<\n fields:int,\n from:timestamp\n>>',''
|
|
'year','int',''
|
|
'month','int',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
select
|
|
s.`table`, s.`comment`, s.`select`,
|
|
a.`replace`, a.`location`,
|
|
m.key, m.`fields`, m.`from`,
|
|
year, month
|
|
from $DATABASE.struct_keywords t, t.a, t.m
|
|
---- RESULTS
|
|
---- TYPES
|
|
INT, STRING, DECIMAL, BOOLEAN, STRING, STRING, INT, TIMESTAMP, INT, INT
|
|
====
|
|
---- QUERY
|
|
# Make sure that specifying sort columns sets the 'sort.columns' property correctly.
|
|
create table sortbytest (i int, d double, b boolean) sort by (d, i);
|
|
describe formatted sortbytest;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'','sort.columns ','d,i '
|
|
---- TYPES
|
|
STRING,STRING,STRING
|
|
====
|