Files
impala/testdata/workloads/functional-query/queries/QueryTest/create.test
Alex Behm 3c650c3ba5 IMPALA-1104: Fixes for compute stats on Avro tables. Create Avro tables without col defs.
This patch addresses the following issues:
1. Allow creating Avro tables without col defs in Impala. Compute stats works on them.
2. Handle table creation with inconsistent col defs and Avro schema as follows:
   The table creation will succeed and ignore the col defs in favor of the Avro schema.
   A warning is issued that the col defs and the Avro schema are inconsistent.
   Compute stats works on such tables.

This patch does not address the issue of compute stats after Avro schema evolution.

Change-Id: Iea6b737d238d81491dc2097012ebc149a89d03ba
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/4182
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Alex Behm <alex.behm@cloudera.com>
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/4250
Tested-by: jenkins
2014-09-11 20:50:05 -07:00

652 lines
15 KiB
Plaintext

====
---- QUERY
drop table if exists ddl_test_db.temp_decimal_table
---- RESULTS
====
---- QUERY
create table ddl_test_db.temp_decimal_table like parquet
'/test-warehouse/schemas/decimal.parquet'
---- RESULTS
====
---- QUERY
describe ddl_test_db.temp_decimal_table
---- RESULTS
'd32','decimal(3,2)','inferred from: required fixed_len_byte_array(16) d32 (DECIMAL(3,2))'
'd11','decimal(1,1)','inferred from: required fixed_len_byte_array(16) d11 (DECIMAL(1,1))'
'd1015','decimal(15,10)','inferred from: required fixed_len_byte_array(16) d1015 (DECIMAL(15,10))'
---- TYPES
STRING, STRING, STRING
====
---- QUERY
drop table if exists ddl_test_db.temp_decimal_table
---- RESULTS
====
---- QUERY
create table ddl_test_db.test_like_file_create like parquet
'/test-warehouse/schemas/zipcode_incomes.parquet'
---- RESULTS
====
---- QUERY
describe ddl_test_db.test_like_file_create
---- RESULTS
'id','string','inferred from: optional binary id'
'zip','string','inferred from: optional binary zip'
'description1','string','inferred from: optional binary description1'
'description2','string','inferred from: optional binary description2'
'income','int','inferred from: optional int32 income'
---- TYPES
STRING, STRING, STRING
====
---- QUERY
drop table ddl_test_db.test_like_file_create
---- RESULTS
====
---- QUERY
create table if not exists ddl_test_db.test_like_file_create like parquet
'/test-warehouse/schemas/alltypestiny.parquet'
---- RESULTS
====
---- QUERY
describe ddl_test_db.test_like_file_create
---- RESULTS
'id','int','inferred from: optional int32 id'
'bool_col','boolean','inferred from: optional boolean bool_col'
'tinyint_col','int','inferred from: optional int32 tinyint_col'
'smallint_col','int','inferred from: optional int32 smallint_col'
'int_col','int','inferred from: optional int32 int_col'
'bigint_col','bigint','inferred from: optional int64 bigint_col'
'float_col','float','inferred from: optional float float_col'
'double_col','double','inferred from: optional double double_col'
'date_string_col','string','inferred from: optional binary date_string_col'
'string_col','string','inferred from: optional binary string_col'
'timestamp_col','timestamp','inferred from: optional int96 timestamp_col'
---- TYPES
STRING, STRING, STRING
====
---- QUERY
create table if not exists ddl_test_db.test_like_file_create like parquet
'/test-warehouse/schemas/zipcode_incomes.parquet'
---- RESULTS
====
---- QUERY
# should not have changed since last statement was IF NOT EXISTS
describe ddl_test_db.test_like_file_create
---- RESULTS
'id','int','inferred from: optional int32 id'
'bool_col','boolean','inferred from: optional boolean bool_col'
'tinyint_col','int','inferred from: optional int32 tinyint_col'
'smallint_col','int','inferred from: optional int32 smallint_col'
'int_col','int','inferred from: optional int32 int_col'
'bigint_col','bigint','inferred from: optional int64 bigint_col'
'float_col','float','inferred from: optional float float_col'
'double_col','double','inferred from: optional double double_col'
'date_string_col','string','inferred from: optional binary date_string_col'
'string_col','string','inferred from: optional binary string_col'
'timestamp_col','timestamp','inferred from: optional int96 timestamp_col'
---- TYPES
STRING, STRING, STRING
====
---- QUERY
drop table ddl_test_db.test_like_file_create
---- RESULTS
====
---- QUERY
# It should show up now
show databases like 'ddl_test_db'
---- RESULTS
'ddl_test_db'
---- TYPES
STRING
====
---- QUERY
# Make sure creating a database with the same name doesn't throw an error when
# IF NOT EXISTS is specified.
create database if not exists ddl_test_db
---- RESULTS
====
---- QUERY
show tables in ddl_test_db
---- RESULTS
====
---- QUERY
create table ddl_test_db.testtbl(i int, s string COMMENT 'String col') STORED AS TEXTFILE
---- RESULTS
====
---- QUERY
# Make sure creating a table with the same name doesn't throw an error when
# IF NOT EXISTS is specified.
create table if not exists ddl_test_db.testtbl(i int, s string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
ESCAPED BY '\\'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE
---- RESULTS
====
---- QUERY
show tables in ddl_test_db
---- RESULTS
'testtbl'
---- TYPES
STRING
====
---- QUERY
describe ddl_test_db.testtbl
---- RESULTS
'i','int',''
's','string','String col'
---- TYPES
STRING, STRING, STRING
====
---- QUERY
insert overwrite table ddl_test_db.testtbl SELECT 1, 'Hi'
from functional.alltypes limit 10
---- RESULTS
: 10
====
---- QUERY
select * from ddl_test_db.testtbl
---- RESULTS
1,'Hi'
1,'Hi'
1,'Hi'
1,'Hi'
1,'Hi'
1,'Hi'
1,'Hi'
1,'Hi'
1,'Hi'
1,'Hi'
---- TYPES
INT, STRING
====
---- QUERY
create table ddl_test_db.testtbl_part(i int, s string) PARTITIONED BY (id int comment 'C')
---- RESULTS
====
---- QUERY
# Partition columns are displayed as part of DESCRIBE <table>
describe ddl_test_db.testtbl_part
---- RESULTS
'i','int',''
's','string',''
'id','int','C'
---- TYPES
STRING, STRING, STRING
====
---- QUERY
insert overwrite table ddl_test_db.testtbl_part partition(id=1)
select 10, 'Ten' from functional.alltypes limit 1
---- RESULTS
id=1/: 1
====
---- QUERY
insert overwrite table ddl_test_db.testtbl_part partition(id=2)
select 20, 'Twenty' from functional.alltypes limit 2
---- RESULTS
id=2/: 2
====
---- QUERY
select * from ddl_test_db.testtbl_part
---- RESULTS
10,'Ten',1
20,'Twenty',2
20,'Twenty',2
---- TYPES
INT, STRING, INT
====
---- QUERY
select * from ddl_test_db.testtbl_part where id = 1
---- RESULTS
10,'Ten',1
---- TYPES
INT, STRING, INT
====
---- QUERY
use ddl_test_db
---- RESULTS
====
---- QUERY
show tables
---- RESULTS
'testtbl'
'testtbl_part'
---- TYPES
STRING
====
---- QUERY
# Make sure we create the table in the proper database after a "use"
create table testtbl2(f float, d double) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
---- RESULTS
====
---- QUERY
show tables
---- RESULTS
'testtbl'
'testtbl2'
'testtbl_part'
---- TYPES
STRING
====
---- QUERY
drop table testtbl2
---- RESULTS
====
---- QUERY
show tables
---- RESULTS
'testtbl'
'testtbl_part'
---- TYPES
STRING
====
---- QUERY
# CREATE TABLE LIKE on partitioned table
create table alltypes_test like functional_seq_snap.alltypes
stored as parquet
---- RESULTS
====
---- QUERY
# CREATE TABLE LIKE on a view
create table like_view like functional.view_view
---- RESULTS
====
---- QUERY
describe like_view
---- RESULTS
'id','int',''
'bool_col','boolean',''
'tinyint_col','tinyint',''
'smallint_col','smallint',''
'int_col','int',''
'bigint_col','bigint',''
'float_col','float',''
'double_col','double',''
'date_string_col','string',''
'string_col','string',''
'timestamp_col','timestamp',''
'year','int',''
'month','int',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
show table stats like_view
---- LABELS
#ROWS, #FILES, SIZE, BYTES CACHED, FORMAT
---- RESULTS
-1,0,'0B','NOT CACHED','TEXT'
---- TYPES
BIGINT, BIGINT, STRING, STRING, STRING
====
---- QUERY
drop table like_view
---- RESULTS
====
---- QUERY
create table like_view_parquet like functional.view_view stored as parquet
---- RESULTS
====
---- QUERY
show table stats like_view_parquet
---- LABELS
#ROWS, #FILES, SIZE, BYTES CACHED, FORMAT
---- RESULTS
-1,0,'0B','NOT CACHED','PARQUET'
---- TYPES
BIGINT, BIGINT, STRING, STRING, STRING
====
---- QUERY
drop table like_view_parquet
---- RESULTS
====
---- QUERY
# Make sure no data exists for this table
select count(*) from alltypes_test
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
# Should be able to insert into this table
insert overwrite table alltypes_test
partition (year=2009, month=4)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col
from functional.alltypes where year=2009 and month=4
---- RESULTS
year=2009/month=4/: 300
====
---- QUERY
# Make sure we can read the new data.
select count(*) from alltypes_test
---- RESULTS
300
---- TYPES
BIGINT
====
---- QUERY
# This should copy the file format from the source table (rc)
create external table jointbl_like like functional_rc_gzip.jointbl
location '/test-warehouse/jointbl_rc_gzip'
---- RESULTS
====
---- QUERY
# should get some results back
select * from jointbl_like order by test_id limit 3
---- RESULTS
1001,'Name1',94611,5000
1002,'Name2',94611,5000
1003,'Name3',94611,5000
---- TYPES
BIGINT, STRING, INT, INT
====
---- QUERY
# CREATE TABLE LIKE on unpartitioned table.
create table testtbl_like like testtbl
---- RESULTS
====
---- QUERY
# Make sure the new table can be queried and no data exists for this table.
select count(*) from testtbl_like
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
# No error is thrown when IF NOT EXISTS is specified and the table already exists.
create table if not exists testtbl_like like testtbl
---- RESULTS
====
---- QUERY
# IF NOT EXISTS also applies when the src table is the same as the new table.
create table if not exists testtbl_like like testtbl_like
---- RESULTS
====
---- QUERY
insert overwrite table testtbl_like
select * from testtbl limit 5
---- RESULTS
: 5
====
---- QUERY
# Make sure we can read the data.
select * from testtbl_like
---- RESULTS
1,'Hi'
1,'Hi'
1,'Hi'
1,'Hi'
1,'Hi'
---- TYPES
INT, STRING
====
---- QUERY
# Ensure that a table can be created using CTAS
create table ctas_join stored as parquet as
select j.*, a.int_col, 1*2
from functional.jointbl j join functional_seq_snap.alltypes a
on (j.alltypes_id=a.id)
---- RESULTS
'Inserted 12 row(s)'
---- TYPES
STRING
====
---- QUERY
describe ctas_join
---- RESULTS
'test_id','bigint',''
'test_name','string',''
'test_zip','int',''
'alltypes_id','int',''
'int_col','int',''
'_c2','smallint',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
select * from ctas_join
---- RESULTS
1001,'Name1',94611,5000,0,2
1002,'Name2',94611,5000,0,2
1003,'Name3',94611,5000,0,2
1004,'Name4',94611,5000,0,2
1005,'Name5',94611,5000,0,2
1106,'Name6',94612,5000,0,2
1006,'Name16',94612,5000,0,2
1006,'Name6',94616,5000,0,2
1106,'Name16',94612,5000,0,2
1106,'Name6',94616,5000,0,2
1006,'Name16',94616,5000,0,2
1106,'Name16',94616,5000,0,2
---- TYPES
BIGINT, STRING, INT, INT, INT, SMALLINT
====
---- QUERY
# Since the table already exists, the second CTAS should be a no-op
create table if not exists ctas_join stored as parquet as
select j.*, a.int_col, 1*2
from functional.jointbl j join functional_seq_snap.alltypes a
on (j.alltypes_id=a.id) limit 1
---- RESULTS
'Inserted 0 row(s)'
---- TYPES
STRING
====
---- QUERY
select * from ctas_join
---- RESULTS
1001,'Name1',94611,5000,0,2
1002,'Name2',94611,5000,0,2
1003,'Name3',94611,5000,0,2
1004,'Name4',94611,5000,0,2
1005,'Name5',94611,5000,0,2
1106,'Name6',94612,5000,0,2
1006,'Name16',94612,5000,0,2
1006,'Name6',94616,5000,0,2
1106,'Name16',94612,5000,0,2
1106,'Name6',94616,5000,0,2
1006,'Name16',94616,5000,0,2
1106,'Name16',94616,5000,0,2
---- TYPES
BIGINT, STRING, INT, INT, INT, SMALLINT
====
---- QUERY
# Validate CTAS with LIMIT 0
create table if not exists ctas_join_limit0 stored as textfile as
select * from functional.jointbl limit 0
---- RESULTS
'Inserted 0 row(s)'
---- TYPES
STRING
====
---- QUERY
describe ctas_join_limit0
---- RESULTS
'test_id','bigint',''
'test_name','string',''
'test_zip','int',''
'alltypes_id','int',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
select * from ctas_join_limit0
---- RESULTS
---- TYPES
BIGINT, STRING, INT, INT
====
---- QUERY
# Validate CTAS with LIMIT 0
create table if not exists ctas_join_limit0 stored as textfile as
select * from functional.jointbl limit 0
---- RESULTS
'Inserted 0 row(s)'
---- TYPES
STRING
====
---- QUERY
select * from ctas_join_limit0
---- RESULTS
---- TYPES
BIGINT, STRING, INT, INT
====
---- QUERY
create table if not exists ctas_join_limit0 stored as textfile as
select * from functional.jointbl limit 4
---- RESULTS
'Inserted 0 row(s)'
---- TYPES
STRING
====
---- QUERY
select * from ctas_join_limit0
---- RESULTS
---- TYPES
BIGINT, STRING, INT, INT
====
---- QUERY
create table allcomplextypes_clone like functional.allcomplextypes
stored as parquet
---- RESULTS
====
---- QUERY
describe allcomplextypes_clone
---- TYPES
STRING, STRING, STRING
---- RESULTS
'id','int',''
'int_array_col','array<int>',''
'array_array_col','array<array<int>>',''
'map_array_col','array<map<string,int>>',''
'struct_array_col','array<struct<f1:bigint,f2:string>>',''
'int_map_col','map<string,int>',''
'array_map_col','map<string,array<int>>',''
'map_map_col','map<string,map<string,int>>',''
'struct_map_col','map<string,struct<f1:bigint,f2:string>>',''
'int_struct_col','struct<f1:int,f2:int>',''
'complex_struct_col','struct<f1:int,f2:array<int>,f3:map<string,int>>',''
'nested_struct_col','struct<f1:int,f2:struct<f11:bigint,f12:struct<f21:bigint>>>',''
'complex_nested_struct_col','struct<f1:int,f2:array<struct<f11:bigint,f12:map<string,struct<f21:bigint>>>>>',''
'year','int',''
'month','int',''
====
---- QUERY
# Test creating an unpartitioned Avro table without column definitions.
create table avro_alltypes_nopart
with serdeproperties
('avro.schema.url'='hdfs:///test-warehouse/avro_schemas/functional/alltypes.json')
stored as avro
---- RESULTS
====
---- QUERY
describe avro_alltypes_nopart
---- TYPES
STRING, STRING, STRING
---- RESULTS
'bigint_col','bigint','from deserializer'
'bool_col','boolean','from deserializer'
'date_string_col','string','from deserializer'
'double_col','double','from deserializer'
'float_col','float','from deserializer'
'id','int','from deserializer'
'int_col','int','from deserializer'
'smallint_col','int','from deserializer'
'string_col','string','from deserializer'
'timestamp_col','string','from deserializer'
'tinyint_col','int','from deserializer'
====
---- QUERY
# Test creating a partitioned Avro table without column definitions.
create table avro_alltypes_part
partitioned by (year int, month int)
with serdeproperties
('avro.schema.url'='hdfs:///test-warehouse/avro_schemas/functional/alltypes.json')
stored as avro
---- RESULTS
====
---- QUERY
describe avro_alltypes_part
---- TYPES
STRING, STRING, STRING
---- RESULTS
'bigint_col','bigint','from deserializer'
'bool_col','boolean','from deserializer'
'date_string_col','string','from deserializer'
'double_col','double','from deserializer'
'float_col','float','from deserializer'
'id','int','from deserializer'
'int_col','int','from deserializer'
'month','int',''
'smallint_col','int','from deserializer'
'string_col','string','from deserializer'
'timestamp_col','string','from deserializer'
'tinyint_col','int','from deserializer'
'year','int',''
====
---- QUERY
drop table avro_alltypes_part
---- RESULTS
====
---- QUERY
drop table avro_alltypes_nopart
---- RESULTS
====
---- QUERY
drop table allcomplextypes_clone
---- RESULTS
====
---- QUERY
drop table ctas_join
---- RESULTS
====
---- QUERY
drop table ctas_join_limit0
---- RESULTS
====
---- QUERY
drop table testtbl
---- RESULTS
====
---- QUERY
drop table testtbl_part
---- RESULTS
====
---- QUERY
drop table alltypes_test
---- RESULTS
====
---- QUERY
drop table jointbl_like
---- RESULTS
====
---- QUERY
drop table testtbl_like
---- RESULTS
====
---- QUERY
show tables
---- RESULTS
---- TYPES
STRING
====
---- QUERY
drop table if exists non_existent_db.tbl
---- RESULTS
====
---- QUERY
# Need to switch databases before dropping
use default;
drop database ddl_test_db
---- RESULTS
====
---- QUERY
# Should be gone now
show databases like 'ddl_test_db'
---- RESULTS
---- TYPES
STRING
====