mirror of
https://github.com/apache/impala.git
synced 2026-01-02 12:00:33 -05:00
For Avro tables the column information in the underlying database of the Hive metastore can be different from what is specified in the avro schema. HIVE-6308 aimed to improve upon this, but for older tables the two don't necessarily align. There are two possible cases: 1) Hive's underlying database contains a column which is not present in the Avro schema file. In this case we encounter a NullPointerException in DescribeResultFactory.java#L189 when trying to look up the column in the internal table object. 2) The Avro schema contains a column, which is not present in the underlying database. In this case the column will not be displayed in describe formatted. In addition to the automatic tests I verified this manually by creating an Avro table with an external schema file in Hive. This populated the underlying database with the column information. I then either removed a column from the Avro schema file (case 1) or cleared the column information from the "COLUMNS_V2" table in the underlying database (case 2) and verified that the change fixed both cases. Change-Id: Ieb69d3678e662465d40aee80ba23132ea13871a0 Reviewed-on: http://gerrit.cloudera.org:8080/4126 Reviewed-by: Lars Volker <lv@cloudera.com> Tested-by: Internal Jenkins Reviewed-by: Jim Apple <jbapple@cloudera.com>
139 lines
3.9 KiB
Plaintext
139 lines
3.9 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Create a table with default fileformat and later change it to Avro using
|
|
# alter sql. The query runs with stale metadata and a warning should be raised.
|
|
# Invalidating metadata should cause the Avro schema to be properly set upon the
|
|
# next metadata load.
|
|
CREATE EXTERNAL TABLE alltypesagg_staleschema (
|
|
id INT,
|
|
bool_col BOOLEAN,
|
|
tinyint_col INT,
|
|
smallint_col INT,
|
|
int_col INT,
|
|
bigint_col BIGINT,
|
|
float_col FLOAT,
|
|
double_col DOUBLE,
|
|
date_string_col STRING,
|
|
string_col STRING,
|
|
timestamp_col STRING
|
|
)
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/alltypesaggmultifilesnopart_avro_snap'
|
|
TBLPROPERTIES ('avro.schema.url'= '$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypesaggmultifilesnopart.json')
|
|
====
|
|
---- QUERY
|
|
alter table alltypesagg_staleschema set fileformat avro
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypesagg_staleschema
|
|
---- CATCH
|
|
Missing Avro schema in scan node. This could be due to stale metadata.
|
|
====
|
|
---- QUERY
|
|
invalidate metadata alltypesagg_staleschema
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypesagg_staleschema
|
|
---- RESULTS
|
|
11000
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3092. Create an Avro table without column definitions and add columns via ALTER
|
|
# TABLE. Querying the table should work.
|
|
CREATE EXTERNAL TABLE avro_alter_table_add_new_column (
|
|
a string,
|
|
b string)
|
|
STORED AS AVRO
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/tinytable_avro';
|
|
|
|
ALTER TABLE avro_alter_table_add_new_column ADD COLUMNS (
|
|
bool_col boolean,
|
|
int_col int,
|
|
bigint_col bigint,
|
|
float_col float,
|
|
double_col double,
|
|
timestamp_col timestamp,
|
|
decimal_col decimal(2,0),
|
|
string_col string)
|
|
====
|
|
---- QUERY
|
|
# Every new column just added should have NULL filled
|
|
select * from avro_alter_table_add_new_column
|
|
---- RESULTS
|
|
'aaaaaaa','bbbbbbb',NULL,NULL,NULL,NULL,NULL,'NULL',NULL,'NULL'
|
|
'ccccc','dddd',NULL,NULL,NULL,NULL,NULL,'NULL',NULL,'NULL'
|
|
'eeeeeeee','f',NULL,NULL,NULL,NULL,NULL,'NULL',NULL,'NULL'
|
|
---- TYPES
|
|
string, string, boolean, int, bigint, float, double, string, decimal, string
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3776: Create an Avro table, add a column to the Avro schema and make sure
|
|
# describe and describe formatted still work.
|
|
CREATE TABLE avro_alter_schema_add_new_column (old_col string) STORED AS AVRO;
|
|
|
|
ALTER TABLE avro_alter_schema_add_new_column SET TBLPROPERTIES (
|
|
'avro.schema.literal'=' {
|
|
"namespace": "org.apache.test",
|
|
"name": "avro_alter_schema_add_new_column",
|
|
"type": "record",
|
|
"fields": [
|
|
{ "name":"old_col", "type":"string" },
|
|
{ "name":"new_col", "type":"string" }
|
|
]
|
|
}'
|
|
);
|
|
|
|
REFRESH avro_alter_schema_add_new_column;
|
|
====
|
|
---- QUERY
|
|
# The new column now has to show up in describe.
|
|
DESCRIBE avro_alter_schema_add_new_column;
|
|
---- TYPES
|
|
string,string,string
|
|
---- RESULTS
|
|
'old_col','string','from deserializer'
|
|
'new_col','string','from deserializer'
|
|
====
|
|
---- QUERY
|
|
# The new column now has to show up in describe formatted.
|
|
DESCRIBE FORMATTED avro_alter_schema_add_new_column;
|
|
---- TYPES
|
|
string,string,string
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'old_col','STRING','from deserializer'
|
|
'new_col','STRING','from deserializer'
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3776: Create an Avro table, remove a column from the Avro schema and make sure
|
|
# describe and describe formatted still work.
|
|
CREATE TABLE avro_alter_schema_remove_column (col1 string, col2 string) STORED AS AVRO;
|
|
|
|
ALTER TABLE avro_alter_schema_remove_column SET TBLPROPERTIES (
|
|
'avro.schema.literal'=' {
|
|
"namespace": "org.apache.test",
|
|
"name": "avro_alter_schema_remove_column",
|
|
"type": "record",
|
|
"fields": [
|
|
{ "name":"col1", "type":"string" }
|
|
]
|
|
}'
|
|
);
|
|
REFRESH avro_alter_schema_remove_column;
|
|
====
|
|
---- QUERY
|
|
# The new column now must not show up in describe.
|
|
DESCRIBE avro_alter_schema_remove_column;
|
|
---- TYPES
|
|
string,string,string
|
|
---- RESULTS
|
|
'col1','string','from deserializer'
|
|
====
|
|
---- QUERY
|
|
DESCRIBE FORMATTED avro_alter_schema_remove_column;
|
|
---- TYPES
|
|
string,string,string
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'col1','STRING','from deserializer'
|
|
====
|