mirror of
https://github.com/apache/impala.git
synced 2026-01-06 06:01:03 -05:00
Address several shortcomings with respect to the usability of Avro tables.
Addressed JIRAs: IMPALA-1947 and IMPALA-1813 New Feature: Adds support for creating an Avro table without an explicit Avro schema with the following syntax. CREATE TABLE <table_name> column_defs STORED AS AVRO Fixes and Improvements: This patch fixes and unifies the logic for reconciling differences between an Avro table's Avro Schema and its column definitions. This reconciliation logic is executed during Impala's CREATE TABLE and when loading a table's metadata. Impala generally performs the schema reconciliation during table creation, but Hive does not. In many cases, Hive's CREATE TABLE stores the original column definitions in the HMS (in the StorageDescriptor) instead of the reconciled column definitions. The reconciliation logic considers the field/column names and follows this conflict resolution policy which is similar to Hive's: Mismatched number of columns -> Prefer Avro columns. Mismatched name/type -> Prefer Avro column, except: A CHAR/VARCHAR column definition maps to an Avro STRING, and is preserved as a CHAR/VARCHAR in the reconciled schema. Behavior for TIMESTAMP: A TIMESTAMP column definition maps to an Avro STRING and is presented as a STRING in the reconciled schema, because Avro has no binary TIMESTAMP representation. As a result, no Avro table may have a TIMESTAMP column (existing behavior). Change-Id: I8457354568b6049b2dd2794b65fadc06e619d648 Reviewed-on: http://gerrit.cloudera.org:8080/550 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: Internal Jenkins
This commit is contained in:
committed by
Internal Jenkins
parent
927b8a4d39
commit
6f0b255c5a
@@ -674,6 +674,85 @@ describe avro_alltypes_part
|
||||
STRING, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Test creating a partitioned Avro table without an Avro schema.
|
||||
# The Avro schema is inferred from the column definitions.
|
||||
create table no_avro_schema (
|
||||
c1 tinyint,
|
||||
c2 smallint comment 'becomes int',
|
||||
c3 int,
|
||||
c4 bigint,
|
||||
c5 float,
|
||||
c6 double,
|
||||
c7 timestamp comment 'becomes string',
|
||||
c8 string,
|
||||
c9 char(10) comment 'preserved',
|
||||
c10 varchar(20),
|
||||
c11 decimal(10, 5),
|
||||
c12 struct<f1:int,f2:string>,
|
||||
c13 array<int>,
|
||||
c14 map<string,string>)
|
||||
partitioned by (year int, month int)
|
||||
stored as avro
|
||||
---- RESULTS
|
||||
====
|
||||
---- QUERY
|
||||
describe no_avro_schema
|
||||
---- RESULTS
|
||||
'c1','int','from deserializer'
|
||||
'c2','int','becomes int'
|
||||
'c3','int','from deserializer'
|
||||
'c4','bigint','from deserializer'
|
||||
'c5','float','from deserializer'
|
||||
'c6','double','from deserializer'
|
||||
'c7','string','becomes string'
|
||||
'c8','string','from deserializer'
|
||||
'c9','char(10)','preserved'
|
||||
'c10','varchar(20)','from deserializer'
|
||||
'c11','decimal(10,5)','from deserializer'
|
||||
'c12','struct<\n f1:int,\n f2:string\n>','from deserializer'
|
||||
'c13','array<int>','from deserializer'
|
||||
'c14','map<string,string>','from deserializer'
|
||||
'year','int',''
|
||||
'month','int',''
|
||||
---- TYPES
|
||||
STRING, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Test creating an Avro table without an Avro schema via CREATE TABLE LIKE (IMPALA-1813)
|
||||
create table like_no_avro_schema like no_avro_schema stored as avro
|
||||
---- RESULTS
|
||||
====
|
||||
---- QUERY
|
||||
describe like_no_avro_schema
|
||||
---- RESULTS
|
||||
'c1','int','from deserializer'
|
||||
'c2','int','becomes int'
|
||||
'c3','int','from deserializer'
|
||||
'c4','bigint','from deserializer'
|
||||
'c5','float','from deserializer'
|
||||
'c6','double','from deserializer'
|
||||
'c7','string','becomes string'
|
||||
'c8','string','from deserializer'
|
||||
'c9','char(10)','preserved'
|
||||
'c10','varchar(20)','from deserializer'
|
||||
'c11','decimal(10,5)','from deserializer'
|
||||
'c12','struct<\n f1:int,\n f2:string\n>','from deserializer'
|
||||
'c13','array<int>','from deserializer'
|
||||
'c14','map<string,string>','from deserializer'
|
||||
'year','int',''
|
||||
'month','int',''
|
||||
---- TYPES
|
||||
STRING, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
drop table like_no_avro_schema
|
||||
---- RESULTS
|
||||
====
|
||||
---- QUERY
|
||||
drop table no_avro_schema
|
||||
---- RESULTS
|
||||
====
|
||||
---- QUERY
|
||||
drop table avro_alltypes_part
|
||||
---- RESULTS
|
||||
====
|
||||
|
||||
Reference in New Issue
Block a user