Files
impala/testdata/workloads/functional-query/queries/QueryTest/create-table-like-table.test
Lars Volker 1ada9dac88 IMPALA-4166: Add SORT BY sql clause
This change adds support for adding SORT BY (...) clauses to CREATE
TABLE and ALTER TABLE statements. Examples are:

CREATE TABLE t (i INT, j INT, k INT) PARTITIONED BY (l INT) SORT BY (i, j);
CREATE TABLE t SORT BY (int_col,id) LIKE u;
CREATE TABLE t LIKE PARQUET '/foo' SORT BY (id,zip);

ALTER TABLE t SORT BY (int_col,id);
ALTER TABLE t SORT BY ();

Sort columns can only be specified for Hdfs tables and effectiveness may
vary based on storage type; for example TEXT tables will not see
improved compression. The SORT BY clause must not contain clustering
columns. The columns in the SORT BY clause are stored in the
'sort.columns' table property and will result in an additional SORT node
being added to the plan before the final table sink. Specifying sort
columns also enables clustering during inserts, so the SORT node will
contain all partitioning columns first, followed by the sort columns. We
do this because sort columns add a SORT node to the plan and adding the
clustering columns to the SORT node is cheap.

Sort columns supersede the sortby() hint, which we will remove in a
subsequent change (IMPALA-5144). Until then, it is possible to specify
sort columns using both ways at the same time and the column lists
will be concatenated.

Change-Id: I08834f38a941786ab45a4381c2732d929a934f75
Reviewed-on: http://gerrit.cloudera.org:8080/6495
Reviewed-by: Lars Volker <lv@cloudera.com>
Tested-by: Impala Public Jenkins
2017-05-12 15:43:30 +00:00

243 lines
6.2 KiB
Plaintext

====
---- QUERY
# CREATE TABLE LIKE on partitioned table
create table alltypes_test like functional_seq_snap.alltypes
stored as parquet
---- RESULTS
====
---- QUERY
# Make sure no data exists for this table
select count(*) from alltypes_test
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
# Should be able to insert into this table
insert overwrite table alltypes_test
partition (year=2009, month=4)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col
from functional.alltypes where year=2009 and month=4
---- RESULTS
year=2009/month=4/: 300
====
---- QUERY
# Make sure we can read the new data.
select count(*) from alltypes_test
---- RESULTS
300
---- TYPES
BIGINT
====
---- QUERY
# CREATE TABLE LIKE on a view
create table like_view like functional.view_view
---- RESULTS
====
---- QUERY
describe like_view
---- RESULTS
'id','int',''
'bool_col','boolean',''
'tinyint_col','tinyint',''
'smallint_col','smallint',''
'int_col','int',''
'bigint_col','bigint',''
'float_col','float',''
'double_col','double',''
'date_string_col','string',''
'string_col','string',''
'timestamp_col','timestamp',''
'year','int',''
'month','int',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
show table stats like_view
---- LABELS
#ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
-1,0,'0B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
---- TYPES
BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
create table like_view_parquet like functional.view_view stored as parquet
---- RESULTS
====
---- QUERY
show table stats like_view_parquet
---- LABELS
#ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
-1,0,'0B','NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
---- TYPES
BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
# This should copy the file format from the source table (rc)
create external table jointbl_rc_like like functional_rc_gzip.jointbl
location '$FILESYSTEM_PREFIX/test-warehouse/jointbl_rc_gzip'
---- RESULTS
====
---- QUERY
# should get some results back
select * from jointbl_rc_like order by test_id limit 3
---- RESULTS
1001,'Name1',94611,5000
1002,'Name2',94611,5000
1003,'Name3',94611,5000
---- TYPES
BIGINT, STRING, INT, INT
====
---- QUERY
# CREATE TABLE LIKE on unpartitioned table.
create table jointbl_like like functional.jointbl
---- RESULTS
====
---- QUERY
# Make sure the new table can be queried and no data exists for this table.
select count(*) from jointbl_like
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
# No error is thrown when IF NOT EXISTS is specified and the table already exists.
create table if not exists jointbl_like like functional.jointbl
---- RESULTS
====
---- QUERY
# IF NOT EXISTS also applies when the src table is the same as the new table.
create table if not exists jointbl_like like jointbl_like
---- RESULTS
====
---- QUERY
insert overwrite table jointbl_like
select * from functional.jointbl order by test_id limit 5
---- RESULTS
: 5
====
---- QUERY
# Make sure we can read the data.
select * from jointbl_like
---- RESULTS
1001,'Name1',94611,5000
1002,'Name2',94611,5000
1003,'Name3',94611,5000
1004,'Name4',94611,5000
1005,'Name5',94611,5000
---- TYPES
BIGINT, STRING, INT, INT
====
---- QUERY
# Test creating a partitioned Avro table without an Avro schema.
# The Avro schema is inferred from the column definitions.
create table no_avro_schema (
c1 tinyint,
c2 smallint comment 'becomes int',
c3 int,
c4 bigint,
c5 float,
c6 double,
c7 timestamp comment 'becomes string',
c8 string,
c9 char(10) comment 'preserved',
c10 varchar(20),
c11 decimal(10, 5),
c12 struct<f1:int,f2:string>,
c13 array<int>,
c14 map<string,string>)
partitioned by (year int, month int)
stored as avro
---- RESULTS
====
---- QUERY
describe no_avro_schema
---- RESULTS
'c1','int','from deserializer'
'c2','int','becomes int'
'c3','int','from deserializer'
'c4','bigint','from deserializer'
'c5','float','from deserializer'
'c6','double','from deserializer'
'c7','string','becomes string'
'c8','string','from deserializer'
'c9','char(10)','preserved'
'c10','varchar(20)','from deserializer'
'c11','decimal(10,5)','from deserializer'
'c12','struct<\n f1:int,\n f2:string\n>','from deserializer'
'c13','array<int>','from deserializer'
'c14','map<string,string>','from deserializer'
'year','int',''
'month','int',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
# Test creating an Avro table without an Avro schema via CREATE TABLE LIKE (IMPALA-1813)
create table like_no_avro_schema like no_avro_schema stored as avro
---- RESULTS
====
---- QUERY
describe like_no_avro_schema
---- RESULTS
'c1','int','from deserializer'
'c2','int','becomes int'
'c3','int','from deserializer'
'c4','bigint','from deserializer'
'c5','float','from deserializer'
'c6','double','from deserializer'
'c7','string','becomes string'
'c8','string','from deserializer'
'c9','char(10)','preserved'
'c10','varchar(20)','from deserializer'
'c11','decimal(10,5)','from deserializer'
'c12','struct<\n f1:int,\n f2:string\n>','from deserializer'
'c13','array<int>','from deserializer'
'c14','map<string,string>','from deserializer'
'year','int',''
'month','int',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
drop table like_no_avro_schema
---- RESULTS
====
---- QUERY
drop table no_avro_schema
---- RESULTS
====
---- QUERY
# Test setting sort.columns when using create table like.
create table sortbytest sort by (int_col, bool_col) like functional.alltypes;
describe formatted sortbytest;
---- RESULTS: VERIFY_IS_SUBSET
'','sort.columns ','int_col,bool_col '
---- TYPES
STRING,STRING,STRING
====
---- QUERY
# Test that sort.columns will be inherited from the source table.
create table sortbytest_clone like sortbytest;
describe formatted sortbytest_clone;
---- RESULTS: VERIFY_IS_SUBSET
'','sort.columns ','int_col,bool_col '
---- TYPES
STRING,STRING,STRING
====
---- QUERY
# Test that sort.columns can be overridden in the query.
create table sortbytest_override sort by (id, string_col) like sortbytest;
describe formatted sortbytest_override;
---- RESULTS: VERIFY_IS_SUBSET
'','sort.columns ','id,string_col '
---- TYPES
STRING,STRING,STRING
====