Files
impala/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test
Lars Volker 0e886618e2 IMPALA-3776: fix 'describe formatted' for Avro tables
For Avro tables the column information in the underlying database of the
Hive metastore can be different from what is specified in the avro
schema. HIVE-6308 aimed to improve upon this, but for older tables the
two don't necessarily align.

There are two possible cases:

1) Hive's underlying database contains a column which is not present in
the Avro schema file. In this case we encounter a NullPointerException
in DescribeResultFactory.java#L189 when trying to look up the column in
the internal table object.

2) The Avro schema contains a column, which is not present in the
underlying database. In this case the column will not be displayed in
describe formatted.

In addition to the automatic tests I verified this manually by creating
an Avro table with an external schema file in Hive. This populated the
underlying database with the column information. I then either removed
a column from the Avro schema file (case 1) or cleared the column
information from the "COLUMNS_V2" table in the underlying database
(case 2) and verified that the change fixed both cases.

Change-Id: Ieb69d3678e662465d40aee80ba23132ea13871a0
Reviewed-on: http://gerrit.cloudera.org:8080/4126
Reviewed-by: Lars Volker <lv@cloudera.com>
Tested-by: Internal Jenkins
Reviewed-by: Jim Apple <jbapple@cloudera.com>
2016-08-26 17:20:10 +00:00

139 lines
3.9 KiB
Plaintext

====
---- QUERY
# Create a table with default fileformat and later change it to Avro using
# alter sql. The query runs with stale metadata and a warning should be raised.
# Invalidating metadata should cause the Avro schema to be properly set upon the
# next metadata load.
CREATE EXTERNAL TABLE alltypesagg_staleschema (
id INT,
bool_col BOOLEAN,
tinyint_col INT,
smallint_col INT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_string_col STRING,
string_col STRING,
timestamp_col STRING
)
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/alltypesaggmultifilesnopart_avro_snap'
TBLPROPERTIES ('avro.schema.url'= '$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypesaggmultifilesnopart.json')
====
---- QUERY
alter table alltypesagg_staleschema set fileformat avro
====
---- QUERY
select count(*) from alltypesagg_staleschema
---- CATCH
Missing Avro schema in scan node. This could be due to stale metadata.
====
---- QUERY
invalidate metadata alltypesagg_staleschema
====
---- QUERY
select count(*) from alltypesagg_staleschema
---- RESULTS
11000
---- TYPES
bigint
====
---- QUERY
# IMPALA-3092. Create an Avro table without column definitions and add columns via ALTER
# TABLE. Querying the table should work.
CREATE EXTERNAL TABLE avro_alter_table_add_new_column (
a string,
b string)
STORED AS AVRO
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/tinytable_avro';
ALTER TABLE avro_alter_table_add_new_column ADD COLUMNS (
bool_col boolean,
int_col int,
bigint_col bigint,
float_col float,
double_col double,
timestamp_col timestamp,
decimal_col decimal(2,0),
string_col string)
====
---- QUERY
# Every new column just added should have NULL filled
select * from avro_alter_table_add_new_column
---- RESULTS
'aaaaaaa','bbbbbbb',NULL,NULL,NULL,NULL,NULL,'NULL',NULL,'NULL'
'ccccc','dddd',NULL,NULL,NULL,NULL,NULL,'NULL',NULL,'NULL'
'eeeeeeee','f',NULL,NULL,NULL,NULL,NULL,'NULL',NULL,'NULL'
---- TYPES
string, string, boolean, int, bigint, float, double, string, decimal, string
====
---- QUERY
# IMPALA-3776: Create an Avro table, add a column to the Avro schema and make sure
# describe and describe formatted still work.
CREATE TABLE avro_alter_schema_add_new_column (old_col string) STORED AS AVRO;
ALTER TABLE avro_alter_schema_add_new_column SET TBLPROPERTIES (
'avro.schema.literal'=' {
"namespace": "org.apache.test",
"name": "avro_alter_schema_add_new_column",
"type": "record",
"fields": [
{ "name":"old_col", "type":"string" },
{ "name":"new_col", "type":"string" }
]
}'
);
REFRESH avro_alter_schema_add_new_column;
====
---- QUERY
# The new column now has to show up in describe.
DESCRIBE avro_alter_schema_add_new_column;
---- TYPES
string,string,string
---- RESULTS
'old_col','string','from deserializer'
'new_col','string','from deserializer'
====
---- QUERY
# The new column now has to show up in describe formatted.
DESCRIBE FORMATTED avro_alter_schema_add_new_column;
---- TYPES
string,string,string
---- RESULTS: VERIFY_IS_SUBSET
'old_col','STRING','from deserializer'
'new_col','STRING','from deserializer'
====
---- QUERY
# IMPALA-3776: Create an Avro table, remove a column from the Avro schema and make sure
# describe and describe formatted still work.
CREATE TABLE avro_alter_schema_remove_column (col1 string, col2 string) STORED AS AVRO;
ALTER TABLE avro_alter_schema_remove_column SET TBLPROPERTIES (
'avro.schema.literal'=' {
"namespace": "org.apache.test",
"name": "avro_alter_schema_remove_column",
"type": "record",
"fields": [
{ "name":"col1", "type":"string" }
]
}'
);
REFRESH avro_alter_schema_remove_column;
====
---- QUERY
# The new column now must not show up in describe.
DESCRIBE avro_alter_schema_remove_column;
---- TYPES
string,string,string
---- RESULTS
'col1','string','from deserializer'
====
---- QUERY
DESCRIBE FORMATTED avro_alter_schema_remove_column;
---- TYPES
string,string,string
---- RESULTS: VERIFY_IS_SUBSET
'col1','STRING','from deserializer'
====