Files
impala/testdata/avro_schema_resolution/create_table.sql
Lenni Kuff fc7733d530 Fix resolution of mismatched column names that come from the deserializer (ex. Avro tables)
Fixes a bug (regression) where the catalog server was not properly resolving column
names when a table's column definition did not match its Avro schema definition.
The expected behavior in this case is that the the Avro scehma definition should be
used instead of the table columns. We had no test tables that were mismatched so
this wasn't caught.
This loading of the schema and columns happens when a table's metadata is loaded, so
the fix is to just add a toThrift() to Column and not reference
metastore.getSd().getCols() directly since it might be the "wrong" set of columns.

Change-Id: I341a3a8834f5748f90c246d2093ddb983ecfdd4f
Reviewed-on: http://gerrit.ent.cloudera.com:8080/770
Reviewed-by: Lenni Kuff <lskuff@cloudera.com>
Tested-by: Lenni Kuff <lskuff@cloudera.com>
2014-01-08 10:53:44 -08:00

30 lines
1.4 KiB
SQL

USE functional_avro_snap;
DROP TABLE IF EXISTS schema_resolution_test;
-- Specify the Avro schema in SERDEPROPERTIES instead of TBLPROPERTIES to validate
-- IMPALA-538. Also, give the table a different column definition (col1, col2) than what
-- is defined in the Avro schema for testing mismatched table/deserializer schemas.
CREATE EXTERNAL TABLE schema_resolution_test (col1 string, col2 string)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
WITH SERDEPROPERTIES ('avro.schema.literal'='{
"name": "a",
"type": "record",
"fields": [
{"name":"boolean1", "type":"boolean", "default": true},
{"name":"int1", "type":"int", "default": 1},
{"name":"long1", "type":"long", "default": 1},
{"name":"float1", "type":"float", "default": 1.0},
{"name":"double1", "type":"double", "default": 1.0},
{"name":"string1", "type":"string", "default": "default string"},
{"name":"string2", "type": ["string", "null"], "default": ""},
{"name":"string3", "type": ["null", "string"], "default": null}
]}')
STORED AS
INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
LOCATION '${hiveconf:hive.metastore.warehouse.dir}/avro_schema_resolution_test/';
LOAD DATA LOCAL INPATH 'records1.avro' OVERWRITE INTO TABLE schema_resolution_test;
LOAD DATA LOCAL INPATH 'records2.avro' INTO TABLE schema_resolution_test;