mirror of
https://github.com/apache/impala.git
synced 2026-01-21 06:02:50 -05:00
This change is a follow-up to IMPALA-7368 and adds support for DATE type to the avro scanner. Similarly to parquet, avro uses DATE logical type for dates. DATE logical type annotates an INT32 that stores the number of days since the unix epoch, 1 January 1970. This representation introduces an avro interoperability issue between Impala and older versions of Hive: - Before version 3.1, Hive used Julian calendar to represent dates up to 1582-10-05 and Gregorian calendar for dates starting with 1582-10-15. Dates between 1582-10-05 and 1582-10-15 were lost. - Impala uses proleptic Gregorian calendar, extending the Gregorian calendar backward to dates preceding its official introduction in 1582-10-15. This means that pre-1582-10-15 dates written to an avro table by Hive will be read back incorrectly by Impala. Note that Hive 3.1 switched to proleptic Gregorian calendar too, so for Hive 3.1+ this is no longer an issue. Dependency changes: - BE uses avro 1.7.4-p5 from native-toolchain. Change-Id: I7a9d5b93a22cf3a00244037e187f8c145cacc959 Reviewed-on: http://gerrit.cloudera.org:8080/13944 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
178 lines
5.0 KiB
Plaintext
178 lines
5.0 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Create a table with default fileformat and later change it to Avro using
|
|
# alter sql. The query runs with stale metadata and a warning should be raised.
|
|
# Invalidating metadata should cause the Avro schema to be properly set upon the
|
|
# next metadata load.
|
|
CREATE EXTERNAL TABLE alltypesagg_staleschema (
|
|
id INT,
|
|
bool_col BOOLEAN,
|
|
tinyint_col INT,
|
|
smallint_col INT,
|
|
int_col INT,
|
|
bigint_col BIGINT,
|
|
float_col FLOAT,
|
|
double_col DOUBLE,
|
|
date_string_col STRING,
|
|
string_col STRING,
|
|
timestamp_col STRING
|
|
)
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/alltypesaggmultifilesnopart_avro_snap'
|
|
TBLPROPERTIES ('avro.schema.url'= '$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypesaggmultifilesnopart.json')
|
|
====
|
|
---- QUERY
|
|
alter table alltypesagg_staleschema set fileformat avro
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypesagg_staleschema
|
|
---- CATCH
|
|
Missing Avro schema in scan node. This could be due to stale metadata.
|
|
====
|
|
---- QUERY
|
|
invalidate metadata alltypesagg_staleschema
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypesagg_staleschema
|
|
---- RESULTS
|
|
11000
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# Same as above but for partitioned tables.
|
|
CREATE EXTERNAL TABLE alltypesagg_staleschema_part (
|
|
id INT,
|
|
bool_col BOOLEAN,
|
|
tinyint_col INT,
|
|
smallint_col INT,
|
|
int_col INT,
|
|
bigint_col BIGINT,
|
|
float_col FLOAT,
|
|
double_col DOUBLE,
|
|
date_string_col STRING,
|
|
string_col STRING,
|
|
timestamp_col STRING
|
|
) partitioned by (part_col int)
|
|
TBLPROPERTIES ('avro.schema.url'= '$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypesaggmultifilesnopart.json')
|
|
====
|
|
---- QUERY
|
|
alter table alltypesagg_staleschema_part add partition (part_col=1) location '$FILESYSTEM_PREFIX/test-warehouse/alltypesaggmultifilesnopart_avro_snap'
|
|
====
|
|
---- QUERY
|
|
alter table alltypesagg_staleschema_part partition (part_col=1) set fileformat avro
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypesagg_staleschema_part
|
|
---- CATCH
|
|
Missing Avro schema in scan node. This could be due to stale metadata.
|
|
====
|
|
---- QUERY
|
|
invalidate metadata alltypesagg_staleschema_part
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypesagg_staleschema_part
|
|
---- RESULTS
|
|
11000
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3092. Create an Avro table without column definitions and add columns via ALTER
|
|
# TABLE. Querying the table should work.
|
|
CREATE EXTERNAL TABLE avro_alter_table_add_new_column (
|
|
a string,
|
|
b string)
|
|
STORED AS AVRO
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/tinytable_avro';
|
|
|
|
ALTER TABLE avro_alter_table_add_new_column ADD COLUMNS (
|
|
bool_col boolean,
|
|
int_col int,
|
|
bigint_col bigint,
|
|
float_col float,
|
|
double_col double,
|
|
timestamp_col timestamp,
|
|
decimal_col decimal(2,0),
|
|
string_col string,
|
|
date_col date)
|
|
====
|
|
---- QUERY
|
|
# Every new column just added should have NULL filled
|
|
select * from avro_alter_table_add_new_column
|
|
---- RESULTS
|
|
'aaaaaaa','bbbbbbb',NULL,NULL,NULL,NULL,NULL,'NULL',NULL,'NULL',NULL
|
|
'ccccc','dddd',NULL,NULL,NULL,NULL,NULL,'NULL',NULL,'NULL',NULL
|
|
'eeeeeeee','f',NULL,NULL,NULL,NULL,NULL,'NULL',NULL,'NULL',NULL
|
|
---- TYPES
|
|
string, string, boolean, int, bigint, float, double, string, decimal, string, date
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3776: Create an Avro table, add a column to the Avro schema and make sure
|
|
# describe and describe formatted still work.
|
|
CREATE TABLE avro_alter_schema_add_new_column (old_col string) STORED AS AVRO;
|
|
|
|
ALTER TABLE avro_alter_schema_add_new_column SET TBLPROPERTIES (
|
|
'avro.schema.literal'=' {
|
|
"namespace": "org.apache.test",
|
|
"name": "avro_alter_schema_add_new_column",
|
|
"type": "record",
|
|
"fields": [
|
|
{ "name":"old_col", "type":"string" },
|
|
{ "name":"new_col", "type":"string" }
|
|
]
|
|
}'
|
|
);
|
|
|
|
REFRESH avro_alter_schema_add_new_column;
|
|
====
|
|
---- QUERY
|
|
# The new column now has to show up in describe.
|
|
DESCRIBE avro_alter_schema_add_new_column;
|
|
---- TYPES
|
|
string,string,string
|
|
---- RESULTS
|
|
'old_col','string','from deserializer'
|
|
'new_col','string','from deserializer'
|
|
====
|
|
---- QUERY
|
|
# The new column now has to show up in describe formatted.
|
|
DESCRIBE FORMATTED avro_alter_schema_add_new_column;
|
|
---- TYPES
|
|
string,string,string
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'old_col','string','from deserializer'
|
|
'new_col','string','from deserializer'
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3776: Create an Avro table, remove a column from the Avro schema and make sure
|
|
# describe and describe formatted still work.
|
|
CREATE TABLE avro_alter_schema_remove_column (col1 string, col2 string) STORED AS AVRO;
|
|
|
|
ALTER TABLE avro_alter_schema_remove_column SET TBLPROPERTIES (
|
|
'avro.schema.literal'=' {
|
|
"namespace": "org.apache.test",
|
|
"name": "avro_alter_schema_remove_column",
|
|
"type": "record",
|
|
"fields": [
|
|
{ "name":"col1", "type":"string" }
|
|
]
|
|
}'
|
|
);
|
|
REFRESH avro_alter_schema_remove_column;
|
|
====
|
|
---- QUERY
|
|
# The new column now must not show up in describe.
|
|
DESCRIBE avro_alter_schema_remove_column;
|
|
---- TYPES
|
|
string,string,string
|
|
---- RESULTS
|
|
'col1','string','from deserializer'
|
|
====
|
|
---- QUERY
|
|
DESCRIBE FORMATTED avro_alter_schema_remove_column;
|
|
---- TYPES
|
|
string,string,string
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'col1','string','from deserializer'
|
|
====
|