mirror of
https://github.com/apache/impala.git
synced 2025-12-30 12:02:10 -05:00
Bug: Commit 6f31c7 fixed a crash when setting Avro schemas for tables with storage altered to Avro file format. However the fix was incomplete for partitioned/multi file format tables since 'hasAvroData_' is not set for all code paths that load the partitioned tables (For example: HdfsTable#loadAllPartitions()). Fix: Moved the code for setting 'hasAvroData_' to addPartition() which is the common logic for all code paths adding new partitions. Also fixed the test coverage gap by adding a new test for partitioned tables altered to Avro format. Change-Id: I7854ff002b2277ec4a5388216218a1d5ad142de8 Reviewed-on: http://gerrit.cloudera.org:8080/5388 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: Internal Jenkins
177 lines
4.9 KiB
Plaintext
177 lines
4.9 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Create a table with default fileformat and later change it to Avro using
|
|
# alter sql. The query runs with stale metadata and a warning should be raised.
|
|
# Invalidating metadata should cause the Avro schema to be properly set upon the
|
|
# next metadata load.
|
|
CREATE EXTERNAL TABLE alltypesagg_staleschema (
|
|
id INT,
|
|
bool_col BOOLEAN,
|
|
tinyint_col INT,
|
|
smallint_col INT,
|
|
int_col INT,
|
|
bigint_col BIGINT,
|
|
float_col FLOAT,
|
|
double_col DOUBLE,
|
|
date_string_col STRING,
|
|
string_col STRING,
|
|
timestamp_col STRING
|
|
)
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/alltypesaggmultifilesnopart_avro_snap'
|
|
TBLPROPERTIES ('avro.schema.url'= '$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypesaggmultifilesnopart.json')
|
|
====
|
|
---- QUERY
|
|
alter table alltypesagg_staleschema set fileformat avro
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypesagg_staleschema
|
|
---- CATCH
|
|
Missing Avro schema in scan node. This could be due to stale metadata.
|
|
====
|
|
---- QUERY
|
|
invalidate metadata alltypesagg_staleschema
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypesagg_staleschema
|
|
---- RESULTS
|
|
11000
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# Same as above but for partitioned tables.
|
|
CREATE EXTERNAL TABLE alltypesagg_staleschema_part (
|
|
id INT,
|
|
bool_col BOOLEAN,
|
|
tinyint_col INT,
|
|
smallint_col INT,
|
|
int_col INT,
|
|
bigint_col BIGINT,
|
|
float_col FLOAT,
|
|
double_col DOUBLE,
|
|
date_string_col STRING,
|
|
string_col STRING,
|
|
timestamp_col STRING
|
|
) partitioned by (part_col int)
|
|
TBLPROPERTIES ('avro.schema.url'= '$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypesaggmultifilesnopart.json')
|
|
====
|
|
---- QUERY
|
|
alter table alltypesagg_staleschema_part add partition (part_col=1) location '$FILESYSTEM_PREFIX/test-warehouse/alltypesaggmultifilesnopart_avro_snap'
|
|
====
|
|
---- QUERY
|
|
alter table alltypesagg_staleschema_part partition (part_col=1) set fileformat avro
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypesagg_staleschema_part
|
|
---- CATCH
|
|
Missing Avro schema in scan node. This could be due to stale metadata.
|
|
====
|
|
---- QUERY
|
|
invalidate metadata alltypesagg_staleschema_part
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypesagg_staleschema_part
|
|
---- RESULTS
|
|
11000
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3092. Create an Avro table without column definitions and add columns via ALTER
|
|
# TABLE. Querying the table should work.
|
|
CREATE EXTERNAL TABLE avro_alter_table_add_new_column (
|
|
a string,
|
|
b string)
|
|
STORED AS AVRO
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/tinytable_avro';
|
|
|
|
ALTER TABLE avro_alter_table_add_new_column ADD COLUMNS (
|
|
bool_col boolean,
|
|
int_col int,
|
|
bigint_col bigint,
|
|
float_col float,
|
|
double_col double,
|
|
timestamp_col timestamp,
|
|
decimal_col decimal(2,0),
|
|
string_col string)
|
|
====
|
|
---- QUERY
|
|
# Every new column just added should have NULL filled
|
|
select * from avro_alter_table_add_new_column
|
|
---- RESULTS
|
|
'aaaaaaa','bbbbbbb',NULL,NULL,NULL,NULL,NULL,'NULL',NULL,'NULL'
|
|
'ccccc','dddd',NULL,NULL,NULL,NULL,NULL,'NULL',NULL,'NULL'
|
|
'eeeeeeee','f',NULL,NULL,NULL,NULL,NULL,'NULL',NULL,'NULL'
|
|
---- TYPES
|
|
string, string, boolean, int, bigint, float, double, string, decimal, string
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3776: Create an Avro table, add a column to the Avro schema and make sure
|
|
# describe and describe formatted still work.
|
|
CREATE TABLE avro_alter_schema_add_new_column (old_col string) STORED AS AVRO;
|
|
|
|
ALTER TABLE avro_alter_schema_add_new_column SET TBLPROPERTIES (
|
|
'avro.schema.literal'=' {
|
|
"namespace": "org.apache.test",
|
|
"name": "avro_alter_schema_add_new_column",
|
|
"type": "record",
|
|
"fields": [
|
|
{ "name":"old_col", "type":"string" },
|
|
{ "name":"new_col", "type":"string" }
|
|
]
|
|
}'
|
|
);
|
|
|
|
REFRESH avro_alter_schema_add_new_column;
|
|
====
|
|
---- QUERY
|
|
# The new column now has to show up in describe.
|
|
DESCRIBE avro_alter_schema_add_new_column;
|
|
---- TYPES
|
|
string,string,string
|
|
---- RESULTS
|
|
'old_col','string','from deserializer'
|
|
'new_col','string','from deserializer'
|
|
====
|
|
---- QUERY
|
|
# The new column now has to show up in describe formatted.
|
|
DESCRIBE FORMATTED avro_alter_schema_add_new_column;
|
|
---- TYPES
|
|
string,string,string
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'old_col','string','from deserializer'
|
|
'new_col','string','from deserializer'
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3776: Create an Avro table, remove a column from the Avro schema and make sure
|
|
# describe and describe formatted still work.
|
|
CREATE TABLE avro_alter_schema_remove_column (col1 string, col2 string) STORED AS AVRO;
|
|
|
|
ALTER TABLE avro_alter_schema_remove_column SET TBLPROPERTIES (
|
|
'avro.schema.literal'=' {
|
|
"namespace": "org.apache.test",
|
|
"name": "avro_alter_schema_remove_column",
|
|
"type": "record",
|
|
"fields": [
|
|
{ "name":"col1", "type":"string" }
|
|
]
|
|
}'
|
|
);
|
|
REFRESH avro_alter_schema_remove_column;
|
|
====
|
|
---- QUERY
|
|
# The new column now must not show up in describe.
|
|
DESCRIBE avro_alter_schema_remove_column;
|
|
---- TYPES
|
|
string,string,string
|
|
---- RESULTS
|
|
'col1','string','from deserializer'
|
|
====
|
|
---- QUERY
|
|
DESCRIBE FORMATTED avro_alter_schema_remove_column;
|
|
---- TYPES
|
|
string,string,string
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'col1','string','from deserializer'
|
|
====
|