mirror of
https://github.com/apache/impala.git
synced 2026-01-04 00:00:56 -05:00
This change adds support for adding SORT BY (...) clauses to CREATE TABLE and ALTER TABLE statements. Examples are: CREATE TABLE t (i INT, j INT, k INT) PARTITIONED BY (l INT) SORT BY (i, j); CREATE TABLE t SORT BY (int_col,id) LIKE u; CREATE TABLE t LIKE PARQUET '/foo' SORT BY (id,zip); ALTER TABLE t SORT BY (int_col,id); ALTER TABLE t SORT BY (); Sort columns can only be specified for Hdfs tables and effectiveness may vary based on storage type; for example TEXT tables will not see improved compression. The SORT BY clause must not contain clustering columns. The columns in the SORT BY clause are stored in the 'sort.columns' table property and will result in an additional SORT node being added to the plan before the final table sink. Specifying sort columns also enables clustering during inserts, so the SORT node will contain all partitioning columns first, followed by the sort columns. We do this because sort columns add a SORT node to the plan and adding the clustering columns to the SORT node is cheap. Sort columns supersede the sortby() hint, which we will remove in a subsequent change (IMPALA-5144). Until then, it is possible to specify sort columns using both ways at the same time and the column lists will be concatenated. Change-Id: I08834f38a941786ab45a4381c2732d929a934f75 Reviewed-on: http://gerrit.cloudera.org:8080/6495 Reviewed-by: Lars Volker <lv@cloudera.com> Tested-by: Impala Public Jenkins
182 lines
6.6 KiB
Plaintext
182 lines
6.6 KiB
Plaintext
====
|
|
---- QUERY
|
|
create table $DATABASE.temp_decimal_table like parquet
|
|
'$FILESYSTEM_PREFIX/test-warehouse/schemas/decimal.parquet'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe $DATABASE.temp_decimal_table
|
|
---- RESULTS
|
|
'd32','decimal(3,2)','Inferred from Parquet file.'
|
|
'd11','decimal(1,1)','Inferred from Parquet file.'
|
|
'd1015','decimal(15,10)','Inferred from Parquet file.'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
create table $DATABASE.like_zipcodes_file like parquet
|
|
'$FILESYSTEM_PREFIX/test-warehouse/schemas/zipcode_incomes.parquet'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe $DATABASE.like_zipcodes_file
|
|
---- RESULTS
|
|
'id','string','Inferred from Parquet file.'
|
|
'zip','string','Inferred from Parquet file.'
|
|
'description1','string','Inferred from Parquet file.'
|
|
'description2','string','Inferred from Parquet file.'
|
|
'income','int','Inferred from Parquet file.'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
create table $DATABASE.like_alltypestiny_file like parquet
|
|
'$FILESYSTEM_PREFIX/test-warehouse/schemas/alltypestiny.parquet'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe $DATABASE.like_alltypestiny_file
|
|
---- RESULTS
|
|
'id','int','Inferred from Parquet file.'
|
|
'bool_col','boolean','Inferred from Parquet file.'
|
|
'tinyint_col','int','Inferred from Parquet file.'
|
|
'smallint_col','int','Inferred from Parquet file.'
|
|
'int_col','int','Inferred from Parquet file.'
|
|
'bigint_col','bigint','Inferred from Parquet file.'
|
|
'float_col','float','Inferred from Parquet file.'
|
|
'double_col','double','Inferred from Parquet file.'
|
|
'date_string_col','string','Inferred from Parquet file.'
|
|
'string_col','string','Inferred from Parquet file.'
|
|
'timestamp_col','timestamp','Inferred from Parquet file.'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Make sure creating a table with the same name doesn't throw an error when
|
|
# IF NOT EXISTS is specified.
|
|
create table if not exists $DATABASE.like_alltypestiny_file like parquet
|
|
'$FILESYSTEM_PREFIX/test-warehouse/schemas/zipcode_incomes.parquet'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Should not have changed since last statement was IF NOT EXISTS.
|
|
describe $DATABASE.like_alltypestiny_file
|
|
---- RESULTS
|
|
'id','int','Inferred from Parquet file.'
|
|
'bool_col','boolean','Inferred from Parquet file.'
|
|
'tinyint_col','int','Inferred from Parquet file.'
|
|
'smallint_col','int','Inferred from Parquet file.'
|
|
'int_col','int','Inferred from Parquet file.'
|
|
'bigint_col','bigint','Inferred from Parquet file.'
|
|
'float_col','float','Inferred from Parquet file.'
|
|
'double_col','double','Inferred from Parquet file.'
|
|
'date_string_col','string','Inferred from Parquet file.'
|
|
'string_col','string','Inferred from Parquet file.'
|
|
'timestamp_col','timestamp','Inferred from Parquet file.'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop table if exists allcomplextypes_clone
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
create table allcomplextypes_clone like functional.allcomplextypes
|
|
stored as parquet
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe allcomplextypes_clone
|
|
---- RESULTS
|
|
'id','int',''
|
|
'int_array_col','array<int>',''
|
|
'array_array_col','array<array<int>>',''
|
|
'map_array_col','array<map<string,int>>',''
|
|
'struct_array_col','array<struct<\n f1:bigint,\n f2:string\n>>',''
|
|
'int_map_col','map<string,int>',''
|
|
'array_map_col','map<string,array<int>>',''
|
|
'map_map_col','map<string,map<string,int>>',''
|
|
'struct_map_col','map<string,struct<\n f1:bigint,\n f2:string\n>>',''
|
|
'int_struct_col','struct<\n f1:int,\n f2:int\n>',''
|
|
'complex_struct_col','struct<\n f1:int,\n f2:array<int>,\n f3:map<string,int>\n>',''
|
|
'nested_struct_col','struct<\n f1:int,\n f2:struct<\n f11:bigint,\n f12:struct<\n f21:bigint\n >\n >\n>',''
|
|
'complex_nested_struct_col','struct<\n f1:int,\n f2:array<struct<\n f11:bigint,\n f12:map<string,struct<\n f21:bigint\n >>\n >>\n>',''
|
|
'year','int',''
|
|
'month','int',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop table allcomplextypes_clone
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
drop table if exists $DATABASE.temp_legacy_table
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
create table $DATABASE.temp_legacy_table like parquet
|
|
'$FILESYSTEM_PREFIX/test-warehouse/schemas/legacy_nested.parquet'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe $DATABASE.temp_legacy_table
|
|
---- RESULTS
|
|
't_long','bigint','Inferred from Parquet file.'
|
|
't_struct','struct<\n f_int:int,\n t_struct:struct<\n f_int:int,\n f_int2:int\n >,\n int_arr:array<int>,\n int_map:map<string,int>\n>','Inferred from Parquet file.'
|
|
't_array_basic','array<int>','Inferred from Parquet file.'
|
|
't_array_struct','array<struct<\n f_int1:int,\n f_int2:int,\n f_int3:int\n>>','Inferred from Parquet file.'
|
|
't_array_array','array<array<int>>','Inferred from Parquet file.'
|
|
't_array_map','array<map<string,int>>','Inferred from Parquet file.'
|
|
'map_int','map<string,int>','Inferred from Parquet file.'
|
|
'map_struct','map<string,struct<\n f_int:int,\n f_int2:int\n>>','Inferred from Parquet file.'
|
|
'map_array','map<string,array<int>>','Inferred from Parquet file.'
|
|
'map_map','map<string,map<string,int>>','Inferred from Parquet file.'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop table if exists $DATABASE.temp_legacy_table
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
drop table if exists $DATABASE.temp_modern_table
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
create table $DATABASE.temp_modern_table like parquet
|
|
'$FILESYSTEM_PREFIX/test-warehouse/schemas/modern_nested.parquet'
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
describe $DATABASE.temp_modern_table
|
|
---- RESULTS
|
|
't_long','bigint','Inferred from Parquet file.'
|
|
't_struct','struct<\n f_int:int,\n t_struct:struct<\n f_int:int,\n f_int2:int\n >,\n int_arr:array<int>,\n int_map:map<string,int>\n>','Inferred from Parquet file.'
|
|
't_array_basic','array<int>','Inferred from Parquet file.'
|
|
't_array_struct','array<struct<\n f_int1:int,\n f_int2:int,\n f_int3:int\n>>','Inferred from Parquet file.'
|
|
't_array_array','array<array<int>>','Inferred from Parquet file.'
|
|
't_array_map','array<map<string,int>>','Inferred from Parquet file.'
|
|
'map_int','map<string,int>','Inferred from Parquet file.'
|
|
'map_struct','map<string,struct<\n f_int:int,\n f_int2:int\n>>','Inferred from Parquet file.'
|
|
'map_array','map<string,array<int>>','Inferred from Parquet file.'
|
|
'map_map','map<string,map<string,int>>','Inferred from Parquet file.'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop table if exists $DATABASE.temp_modern_table
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Test adding sort.columns when creating a table like a parquet file.
|
|
create table $DATABASE.sorted_zipcodes_file like parquet
|
|
'$FILESYSTEM_PREFIX/test-warehouse/schemas/zipcode_incomes.parquet'
|
|
sort by (id, zip) stored as textfile;
|
|
describe formatted $DATABASE.sorted_zipcodes_file;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'','sort.columns ','id,zip '
|
|
---- TYPES
|
|
STRING,STRING,STRING
|
|
====
|