Files
impala/testdata/workloads/functional-query/queries/QueryTest/create-table-as-select.test
Lars Volker 1ada9dac88 IMPALA-4166: Add SORT BY sql clause
This change adds support for adding SORT BY (...) clauses to CREATE
TABLE and ALTER TABLE statements. Examples are:

CREATE TABLE t (i INT, j INT, k INT) PARTITIONED BY (l INT) SORT BY (i, j);
CREATE TABLE t SORT BY (int_col,id) LIKE u;
CREATE TABLE t LIKE PARQUET '/foo' SORT BY (id,zip);

ALTER TABLE t SORT BY (int_col,id);
ALTER TABLE t SORT BY ();

Sort columns can only be specified for Hdfs tables and effectiveness may
vary based on storage type; for example TEXT tables will not see
improved compression. The SORT BY clause must not contain clustering
columns. The columns in the SORT BY clause are stored in the
'sort.columns' table property and will result in an additional SORT node
being added to the plan before the final table sink. Specifying sort
columns also enables clustering during inserts, so the SORT node will
contain all partitioning columns first, followed by the sort columns. We
do this because sort columns add a SORT node to the plan and adding the
clustering columns to the SORT node is cheap.

Sort columns supersede the sortby() hint, which we will remove in a
subsequent change (IMPALA-5144). Until then, it is possible to specify
sort columns using both ways at the same time and the column lists
will be concatenated.

Change-Id: I08834f38a941786ab45a4381c2732d929a934f75
Reviewed-on: http://gerrit.cloudera.org:8080/6495
Reviewed-by: Lars Volker <lv@cloudera.com>
Tested-by: Impala Public Jenkins
2017-05-12 15:43:30 +00:00

202 lines
5.3 KiB
Plaintext

====
---- QUERY
# Ensure that a table can be created using CTAS
create table ctas_join stored as parquet as
select j.*, a.int_col, 1*2
from functional.jointbl j join functional_seq_snap.alltypes a
on (j.alltypes_id=a.id)
---- RESULTS
'Inserted 12 row(s)'
---- TYPES
STRING
====
---- QUERY
describe ctas_join
---- RESULTS
'test_id','bigint',''
'test_name','string',''
'test_zip','int',''
'alltypes_id','int',''
'int_col','int',''
'_c2','smallint',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
select * from ctas_join
---- RESULTS
1001,'Name1',94611,5000,0,2
1002,'Name2',94611,5000,0,2
1003,'Name3',94611,5000,0,2
1004,'Name4',94611,5000,0,2
1005,'Name5',94611,5000,0,2
1106,'Name6',94612,5000,0,2
1006,'Name16',94612,5000,0,2
1006,'Name6',94616,5000,0,2
1106,'Name16',94612,5000,0,2
1106,'Name6',94616,5000,0,2
1006,'Name16',94616,5000,0,2
1106,'Name16',94616,5000,0,2
---- TYPES
BIGINT, STRING, INT, INT, INT, SMALLINT
====
---- QUERY
# Since the table already exists, the second CTAS should be a no-op
create table if not exists ctas_join stored as parquet as
select j.*, a.int_col, 1*2
from functional.jointbl j join functional_seq_snap.alltypes a
on (j.alltypes_id=a.id) limit 1
---- RESULTS
'Inserted 0 row(s)'
---- TYPES
STRING
====
---- QUERY
select * from ctas_join
---- RESULTS
1001,'Name1',94611,5000,0,2
1002,'Name2',94611,5000,0,2
1003,'Name3',94611,5000,0,2
1004,'Name4',94611,5000,0,2
1005,'Name5',94611,5000,0,2
1106,'Name6',94612,5000,0,2
1006,'Name16',94612,5000,0,2
1006,'Name6',94616,5000,0,2
1106,'Name16',94612,5000,0,2
1106,'Name6',94616,5000,0,2
1006,'Name16',94616,5000,0,2
1106,'Name16',94616,5000,0,2
---- TYPES
BIGINT, STRING, INT, INT, INT, SMALLINT
====
---- QUERY
# Validate CTAS with LIMIT 0
create table if not exists ctas_join_limit0 stored as textfile as
select * from functional.jointbl limit 0
---- RESULTS
'Inserted 0 row(s)'
---- TYPES
STRING
====
---- QUERY
describe ctas_join_limit0
---- RESULTS
'test_id','bigint',''
'test_name','string',''
'test_zip','int',''
'alltypes_id','int',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
select * from ctas_join_limit0
---- RESULTS
---- TYPES
BIGINT, STRING, INT, INT
====
---- QUERY
# Validate CTAS with LIMIT 0 and IF NOT EXISTS when the target
# table already exists.
create table if not exists ctas_join_limit0 stored as textfile as
select * from functional.jointbl limit 0
---- RESULTS
'Inserted 0 row(s)'
---- TYPES
STRING
====
---- QUERY
select * from ctas_join_limit0
---- RESULTS
---- TYPES
BIGINT, STRING, INT, INT
====
---- QUERY
create table if not exists ctas_join_limit0 stored as textfile as
select * from functional.jointbl limit 4
---- RESULTS
'Inserted 0 row(s)'
---- TYPES
STRING
====
---- QUERY
select * from ctas_join_limit0
---- RESULTS
---- TYPES
BIGINT, STRING, INT, INT
====
---- QUERY
# IMPALA-2203: Test CTAS from a select statement that has outer-joined inline views with
# constant exprs in the select list. The non-matches of the outer join should be NULL.
create table ctas_impala_2203 as
select a.id, a.bool_col, a.tinyint_col, a.smallint_col, a.int_col, a.bigint_col,
b.float_col, b.double_col, b.date_string_col, b.string_col, b.timestamp_col
from
(select id, false bool_col, 1 tinyint_col, 2 smallint_col, 3 int_col, 4 bigint_col
from functional.alltypestiny where id between 0 and 2) a
full outer join
(select id, 5 float_col, 6 double_col, "s1" date_string_col, "s2" string_col,
cast("2009-02-06 00:01:00" as timestamp) timestamp_col
from functional.alltypestiny where id between 1 and 3) b
on (a.id = b.id)
---- RESULTS
'Inserted 4 row(s)'
---- TYPES
STRING
====
---- QUERY
select * from ctas_impala_2203
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,false,1,2,3,4,NULL,NULL,'NULL','NULL',NULL
1,false,1,2,3,4,5,6,'s1','s2',2009-02-06 00:01:00
2,false,1,2,3,4,5,6,'s1','s2',2009-02-06 00:01:00
NULL,NULL,NULL,NULL,NULL,NULL,5,6,'s1','s2',2009-02-06 00:01:00
---- TYPES
INT, BOOLEAN, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, STRING, STRING, TIMESTAMP
====
---- QUERY
# Test creating a partitioned Parquet table with CTAS.
create table $DATABASE.ctas_part_alltypestiny
partitioned by (year,month) stored as parquet
as select * from functional.alltypestiny
---- RESULTS
'Inserted 8 row(s)'
====
---- QUERY
select id, float_col, timestamp_col, string_col, year, month
from $DATABASE.ctas_part_alltypestiny
---- RESULTS
0,0,2009-01-01 00:00:00,'0',2009,1
1,1.100000023841858,2009-01-01 00:01:00,'1',2009,1
2,0,2009-02-01 00:00:00,'0',2009,2
3,1.100000023841858,2009-02-01 00:01:00,'1',2009,2
4,0,2009-03-01 00:00:00,'0',2009,3
5,1.100000023841858,2009-03-01 00:01:00,'1',2009,3
6,0,2009-04-01 00:00:00,'0',2009,4
7,1.100000023841858,2009-04-01 00:01:00,'1',2009,4
---- TYPES
INT,FLOAT,TIMESTAMP,STRING,INT,INT
====
---- QUERY
# IMPALA-2711: Make sure no memory leak from Rand().
create table $DATABASE.rand_ctas as select rand() from functional.alltypes;
---- RESULTS
'Inserted 7300 row(s)'
---- ERRORS
====
---- QUERY
# IMPALA-5145: Do not constant fold null in CastExprs
create table if not exists cast_null_as_int as (select cast(null as int) c);
---- RESULTS
'Inserted 1 row(s)'
====
---- QUERY
# Test adding sort.columns when creating a table from a select query.
create table sortbytest sort by (int_col, bool_col) as
select * from functional.alltypessmall;
describe formatted sortbytest;
---- RESULTS: VERIFY_IS_SUBSET
'','sort.columns ','int_col,bool_col '
---- TYPES
STRING,STRING,STRING
====