Files
impala/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test
Attila Jeges 27fa27e808 IMPALA-8198: DATE: Read from avro.
This change is a follow-up to IMPALA-7368 and adds support for DATE
type to the avro scanner.

Similarly to parquet, avro uses DATE logical type for dates. DATE
logical type annotates an INT32 that stores the number of days since
the unix epoch, 1 January 1970.

This representation introduces an avro interoperability issue between
Impala and older versions of Hive:
- Before version 3.1, Hive used Julian calendar to represent dates
  up to 1582-10-05 and Gregorian calendar for dates starting with
  1582-10-15. Dates between 1582-10-05 and 1582-10-15 were lost.
- Impala uses proleptic Gregorian calendar, extending the Gregorian
  calendar backward to dates preceding its official introduction in
  1582-10-15.
This means that pre-1582-10-15 dates written to an avro table by Hive
will be read back incorrectly by Impala.

Note that Hive 3.1 switched to proleptic Gregorian calendar too, so
for Hive 3.1+ this is no longer an issue.

Dependency changes:
- BE uses avro 1.7.4-p5 from native-toolchain.

Change-Id: I7a9d5b93a22cf3a00244037e187f8c145cacc959
Reviewed-on: http://gerrit.cloudera.org:8080/13944
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2019-09-27 17:18:35 +00:00

178 lines
5.0 KiB
Plaintext

====
---- QUERY
# Create a table with default fileformat and later change it to Avro using
# alter sql. The query runs with stale metadata and a warning should be raised.
# Invalidating metadata should cause the Avro schema to be properly set upon the
# next metadata load.
CREATE EXTERNAL TABLE alltypesagg_staleschema (
id INT,
bool_col BOOLEAN,
tinyint_col INT,
smallint_col INT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_string_col STRING,
string_col STRING,
timestamp_col STRING
)
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/alltypesaggmultifilesnopart_avro_snap'
TBLPROPERTIES ('avro.schema.url'= '$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypesaggmultifilesnopart.json')
====
---- QUERY
alter table alltypesagg_staleschema set fileformat avro
====
---- QUERY
select count(*) from alltypesagg_staleschema
---- CATCH
Missing Avro schema in scan node. This could be due to stale metadata.
====
---- QUERY
invalidate metadata alltypesagg_staleschema
====
---- QUERY
select count(*) from alltypesagg_staleschema
---- RESULTS
11000
---- TYPES
bigint
====
---- QUERY
# Same as above but for partitioned tables.
CREATE EXTERNAL TABLE alltypesagg_staleschema_part (
id INT,
bool_col BOOLEAN,
tinyint_col INT,
smallint_col INT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_string_col STRING,
string_col STRING,
timestamp_col STRING
) partitioned by (part_col int)
TBLPROPERTIES ('avro.schema.url'= '$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypesaggmultifilesnopart.json')
====
---- QUERY
alter table alltypesagg_staleschema_part add partition (part_col=1) location '$FILESYSTEM_PREFIX/test-warehouse/alltypesaggmultifilesnopart_avro_snap'
====
---- QUERY
alter table alltypesagg_staleschema_part partition (part_col=1) set fileformat avro
====
---- QUERY
select count(*) from alltypesagg_staleschema_part
---- CATCH
Missing Avro schema in scan node. This could be due to stale metadata.
====
---- QUERY
invalidate metadata alltypesagg_staleschema_part
====
---- QUERY
select count(*) from alltypesagg_staleschema_part
---- RESULTS
11000
---- TYPES
bigint
====
---- QUERY
# IMPALA-3092. Create an Avro table without column definitions and add columns via ALTER
# TABLE. Querying the table should work.
CREATE EXTERNAL TABLE avro_alter_table_add_new_column (
a string,
b string)
STORED AS AVRO
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/tinytable_avro';
ALTER TABLE avro_alter_table_add_new_column ADD COLUMNS (
bool_col boolean,
int_col int,
bigint_col bigint,
float_col float,
double_col double,
timestamp_col timestamp,
decimal_col decimal(2,0),
string_col string,
date_col date)
====
---- QUERY
# Every new column just added should have NULL filled
select * from avro_alter_table_add_new_column
---- RESULTS
'aaaaaaa','bbbbbbb',NULL,NULL,NULL,NULL,NULL,'NULL',NULL,'NULL',NULL
'ccccc','dddd',NULL,NULL,NULL,NULL,NULL,'NULL',NULL,'NULL',NULL
'eeeeeeee','f',NULL,NULL,NULL,NULL,NULL,'NULL',NULL,'NULL',NULL
---- TYPES
string, string, boolean, int, bigint, float, double, string, decimal, string, date
====
---- QUERY
# IMPALA-3776: Create an Avro table, add a column to the Avro schema and make sure
# describe and describe formatted still work.
CREATE TABLE avro_alter_schema_add_new_column (old_col string) STORED AS AVRO;
ALTER TABLE avro_alter_schema_add_new_column SET TBLPROPERTIES (
'avro.schema.literal'=' {
"namespace": "org.apache.test",
"name": "avro_alter_schema_add_new_column",
"type": "record",
"fields": [
{ "name":"old_col", "type":"string" },
{ "name":"new_col", "type":"string" }
]
}'
);
REFRESH avro_alter_schema_add_new_column;
====
---- QUERY
# The new column now has to show up in describe.
DESCRIBE avro_alter_schema_add_new_column;
---- TYPES
string,string,string
---- RESULTS
'old_col','string','from deserializer'
'new_col','string','from deserializer'
====
---- QUERY
# The new column now has to show up in describe formatted.
DESCRIBE FORMATTED avro_alter_schema_add_new_column;
---- TYPES
string,string,string
---- RESULTS: VERIFY_IS_SUBSET
'old_col','string','from deserializer'
'new_col','string','from deserializer'
====
---- QUERY
# IMPALA-3776: Create an Avro table, remove a column from the Avro schema and make sure
# describe and describe formatted still work.
CREATE TABLE avro_alter_schema_remove_column (col1 string, col2 string) STORED AS AVRO;
ALTER TABLE avro_alter_schema_remove_column SET TBLPROPERTIES (
'avro.schema.literal'=' {
"namespace": "org.apache.test",
"name": "avro_alter_schema_remove_column",
"type": "record",
"fields": [
{ "name":"col1", "type":"string" }
]
}'
);
REFRESH avro_alter_schema_remove_column;
====
---- QUERY
# The new column now must not show up in describe.
DESCRIBE avro_alter_schema_remove_column;
---- TYPES
string,string,string
---- RESULTS
'col1','string','from deserializer'
====
---- QUERY
DESCRIBE FORMATTED avro_alter_schema_remove_column;
---- TYPES
string,string,string
---- RESULTS: VERIFY_IS_SUBSET
'col1','string','from deserializer'
====