mirror of
https://github.com/apache/impala.git
synced 2026-01-22 09:01:58 -05:00
This change is a follow-up to IMPALA-7368 and adds support for DATE type to the avro scanner. Similarly to parquet, avro uses DATE logical type for dates. DATE logical type annotates an INT32 that stores the number of days since the unix epoch, 1 January 1970. This representation introduces an avro interoperability issue between Impala and older versions of Hive: - Before version 3.1, Hive used Julian calendar to represent dates up to 1582-10-05 and Gregorian calendar for dates starting with 1582-10-15. Dates between 1582-10-05 and 1582-10-15 were lost. - Impala uses proleptic Gregorian calendar, extending the Gregorian calendar backward to dates preceding its official introduction in 1582-10-15. This means that pre-1582-10-15 dates written to an avro table by Hive will be read back incorrectly by Impala. Note that Hive 3.1 switched to proleptic Gregorian calendar too, so for Hive 3.1+ this is no longer an issue. Dependency changes: - BE uses avro 1.7.4-p5 from native-toolchain. Change-Id: I7a9d5b93a22cf3a00244037e187f8c145cacc959 Reviewed-on: http://gerrit.cloudera.org:8080/13944 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
181 lines
7.5 KiB
Plaintext
181 lines
7.5 KiB
Plaintext
====
|
|
---- QUERY
|
|
# see testdata/avro_schema_resolution
|
|
select * from functional_avro_snap.schema_resolution_test
|
|
---- TYPES
|
|
boolean, int, long, float, double, date, string, string, string
|
|
---- RESULTS
|
|
true,1,1,1,1,1970-01-02,'default string','','NULL'
|
|
false,2,2,2,2,1970-01-03,'serialized string','','NULL'
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1136: Tests that Impala can read Hive-created Avro tables that have
|
|
# no specified Avro schema, i.e., the Avro schema is inferred from the column
|
|
# definitions.
|
|
# IMPALA-1947: A TIMESTAMP from the column definitions results in a STRING column
|
|
# backed by a stored Avro STRING during table loading.
|
|
# See testdata/avro_schema_resolution
|
|
select * from functional_avro_snap.no_avro_schema where year = 2009 order by id limit 1
|
|
union all
|
|
select * from functional_avro_snap.no_avro_schema where year = 2010 order by id limit 1
|
|
---- TYPES
|
|
int, boolean, int, int, int, bigint, float, double, string, string, string, int, int
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
2430,true,0,0,0,0,0,0,'09/01/09','0','2009-09-01 00:00:00',2009,9
|
|
6380,true,0,0,0,0,0,0,'10/01/10','0','2010-10-01 00:00:00',2010,10
|
|
====
|
|
---- QUERY
|
|
# IMPALA-2798: Test mismatched column definitions and Avro schema.
|
|
select count(*) from functional_avro_snap.avro_coldef
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
10
|
|
====
|
|
---- QUERY
|
|
# IMPALA-2798: Test mismatched column definitions and Avro schema.
|
|
select count(*) from functional_avro_snap.avro_extra_coldef
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
10
|
|
====
|
|
---- QUERY
|
|
# IMPALA-2798: Test mismatched column definitions and Avro schema.
|
|
select tinyint_col, string_col from functional_avro_snap.avro_coldef order by int_col
|
|
---- TYPES
|
|
int, string
|
|
---- RESULTS
|
|
0,'0'
|
|
0,'0'
|
|
1,'1'
|
|
1,'1'
|
|
2,'2'
|
|
2,'2'
|
|
3,'3'
|
|
3,'3'
|
|
4,'4'
|
|
4,'4'
|
|
====
|
|
---- QUERY
|
|
# IMPALA-2798: Test mismatched column definitions and Avro schema.
|
|
select tinyint_col, string_col from functional_avro_snap.avro_extra_coldef order by int_col
|
|
---- TYPES
|
|
int, string
|
|
---- RESULTS
|
|
0,'0'
|
|
0,'0'
|
|
1,'1'
|
|
1,'1'
|
|
2,'2'
|
|
2,'2'
|
|
3,'3'
|
|
3,'3'
|
|
4,'4'
|
|
4,'4'
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3687: Create a table with mismatched column definitions and Avro schema.
|
|
CREATE EXTERNAL TABLE alltypesagg_mismatch_column_name_comment (
|
|
id INT COMMENT 'int commnet',
|
|
bool_col BOOLEAN COMMENT 'bool commnet',
|
|
tinyint_col float COMMENT 'tinyint comment',
|
|
smallint_col double COMMENT 'smallint comment',
|
|
int_col INT COMMENT 'int comment',
|
|
bigint_col BIGINT COMMENT 'bigint comment',
|
|
float_col FLOAT COMMENT 'float comment',
|
|
double_col DOUBLE COMMENT 'double comment',
|
|
date_string_col STRING COMMENT 'string comment',
|
|
char_col char(2) COMMENT 'char comment',
|
|
varchar_col varchar(5) COMMENT 'varchar comment'
|
|
)
|
|
STORED AS AVRO
|
|
TBLPROPERTIES ('avro.schema.literal'= '{
|
|
"fields": [
|
|
{"type": ["int", "null"], "name": "id_mismatch", "doc": "int comment mismatch"},
|
|
{"type": ["boolean", "null"], "name": "bool_col_mismatch", "doc": "bool comment mismatch"},
|
|
{"type": ["int", "null"], "name": "tinyint_col_mismatch", "doc": "tinyint comment mismatch"},
|
|
{"type": ["int", "null"], "name": "smallint_col_mismatch", "doc": "smallint comment mismatch"},
|
|
{"type": ["int", "null"], "name": "int_col_mismatch", "doc": "int comment mismatch"},
|
|
{"type": ["long", "null"], "name": "bigint_col_mismatch", "doc": "bigint comment mismatch"},
|
|
{"type": ["float", "null"], "name": "float_col_mismatch", "doc": "float comment mismatch"},
|
|
{"type": ["double", "null"], "name": "double_col_mismatch", "doc": "double comment mismatch"},
|
|
{"type": ["string", "null"], "name": "date_string_col_mismatch", "doc": "string comment mismatch"},
|
|
{"type": ["string", "null"], "name": "char_col_mismatch", "doc": "char comment mismatch"},
|
|
{"type": ["string", "null"], "name": "varchar_col_mismatch", "doc": "varchar comment mismatch"}],
|
|
"type": "record", "name": "a"}')
|
|
====
|
|
---- QUERY
|
|
# We favor Avro schema definition over column definitions except for CHAR/VARCHAR, for
|
|
# which we use the type from the column definition.
|
|
DESCRIBE alltypesagg_mismatch_column_name_comment
|
|
---- RESULTS
|
|
'id_mismatch','int','int comment mismatch'
|
|
'bool_col_mismatch','boolean','bool comment mismatch'
|
|
'tinyint_col_mismatch','int','tinyint comment mismatch'
|
|
'smallint_col_mismatch','int','smallint comment mismatch'
|
|
'int_col_mismatch','int','int comment mismatch'
|
|
'bigint_col_mismatch','bigint','bigint comment mismatch'
|
|
'float_col_mismatch','float','float comment mismatch'
|
|
'double_col_mismatch','double','double comment mismatch'
|
|
'date_string_col_mismatch','string','string comment mismatch'
|
|
'char_col_mismatch','char(2)','char comment mismatch'
|
|
'varchar_col_mismatch','varchar(5)','varchar comment mismatch'
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3687: Create a table with only Avro schema and later alter it to be a mismatched
|
|
# schema.
|
|
CREATE EXTERNAL TABLE alltypesagg_alter_avro_name_comment
|
|
STORED AS AVRO
|
|
TBLPROPERTIES ('avro.schema.literal'= '{
|
|
"fields": [
|
|
{"type": ["int", "null"], "name": "id", "doc": "int comment"},
|
|
{"type": ["boolean", "null"], "name": "bool_col", "doc": "bool comment"},
|
|
{"type": ["int", "null"], "name": "tinyint_col", "doc": "tinyint comment"},
|
|
{"type": ["int", "null"], "name": "smallint_col", "doc": "smallint comment"},
|
|
{"type": ["int", "null"], "name": "int_col", "doc": "int comment"},
|
|
{"type": ["long", "null"], "name": "bigint_col", "doc": "bigint comment"},
|
|
{"type": ["float", "null"], "name": "float_col", "doc": "float comment"},
|
|
{"type": ["double", "null"], "name": "double_col", "doc": "double comment"},
|
|
{"type": ["string", "null"], "name": "date_string_col", "doc": "string comment"},
|
|
{"type": ["string", "null"], "name": "char_col", "doc": "char comment"},
|
|
{"type": ["string", "null"], "name": "varchar_col", "doc": "varchar comment"}],
|
|
"type": "record", "name": "a"}');
|
|
|
|
ALTER TABLE alltypesagg_alter_avro_name_comment SET TBLPROPERTIES ('avro.schema.literal'= '{
|
|
"fields": [
|
|
{"type": ["int", "null"], "name": "id_mismatch", "doc": "int comment mismatch"},
|
|
{"type": ["boolean", "null"], "name": "bool_col_mismatch", "doc": "bool comment mismatch"},
|
|
{"type": ["int", "null"], "name": "tinyint_col_mismatch", "doc": "tinyint comment mismatch"},
|
|
{"type": ["int", "null"], "name": "smallint_col_mismatch", "doc": "smallint comment mismatch"},
|
|
{"type": ["int", "null"], "name": "int_col_mismatch", "doc": "int comment mismatch"},
|
|
{"type": ["long", "null"], "name": "bigint_col_mismatch", "doc": "bigint comment mismatch"},
|
|
{"type": ["float", "null"], "name": "float_col_mismatch", "doc": "float comment mismatch"},
|
|
{"type": ["double", "null"], "name": "double_col_mismatch", "doc": "double comment mismatch"},
|
|
{"type": ["string", "null"], "name": "date_string_col_mismatch", "doc": "string comment mismatch"},
|
|
{"type": ["string", "null"], "name": "char_col_mismatch", "doc": "char comment mismatch"},
|
|
{"type": ["string", "null"], "name": "varchar_col_mismatch", "doc": "varchar comment mismatch"}],
|
|
"type": "record", "name": "a"}');
|
|
|
|
REFRESH alltypesagg_alter_avro_name_comment;
|
|
====
|
|
---- QUERY
|
|
DESCRIBE alltypesagg_alter_avro_name_comment
|
|
---- RESULTS
|
|
'id_mismatch','int','int comment mismatch'
|
|
'bool_col_mismatch','boolean','bool comment mismatch'
|
|
'tinyint_col_mismatch','int','tinyint comment mismatch'
|
|
'smallint_col_mismatch','int','smallint comment mismatch'
|
|
'int_col_mismatch','int','int comment mismatch'
|
|
'bigint_col_mismatch','bigint','bigint comment mismatch'
|
|
'float_col_mismatch','float','float comment mismatch'
|
|
'double_col_mismatch','double','double comment mismatch'
|
|
'date_string_col_mismatch','string','string comment mismatch'
|
|
'char_col_mismatch','string','char comment mismatch'
|
|
'varchar_col_mismatch','string','varchar comment mismatch'
|
|
---- TYPES
|
|
string, string, string
|
|
====
|