mirror of
https://github.com/apache/impala.git
synced 2026-02-02 06:00:36 -05:00
Added default 0 for scale if it is not set to comply with parquet spec.
Wrapped reading scale and precision in a function to support reading
LogicalType.DecimalType if it is set, falling back to old ones if it is
not, for backward compatibility.
Regenerated bad_parquet_decimals table with filled DecimalType, moved
missing scale test, as it is no longer a bad table.
Added no_scale.parquet table to test reading table without set scale.
Checked it with parquet-tools:
message schema {
optional fixed_len_byte_array(2) d1 (DECIMAL(4,0));
}
Change-Id: I003220b6e2ef39d25d1c33df62c8432803fdc6eb
Reviewed-on: http://gerrit.cloudera.org:8080/18224
Reviewed-by: Zoltan Borok-Nagy <boroknagyz@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
146 lines
4.0 KiB
Plaintext
146 lines
4.0 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Returns all results despite a discrepancy between the number of values
|
|
# scanned and the number of values stored in the file metadata.
|
|
# Set a single node and scanner thread to make this test deterministic.
|
|
set num_nodes=1;
|
|
set num_scanner_threads=1;
|
|
select id, cnt from bad_column_metadata t, (select count(*) cnt from t.int_array) v
|
|
---- TYPES
|
|
bigint,bigint
|
|
---- RESULTS
|
|
1,10
|
|
2,10
|
|
3,10
|
|
4,10
|
|
5,10
|
|
6,10
|
|
7,10
|
|
8,10
|
|
9,10
|
|
10,10
|
|
11,10
|
|
12,10
|
|
13,10
|
|
14,10
|
|
15,10
|
|
16,10
|
|
17,10
|
|
18,10
|
|
19,10
|
|
20,10
|
|
21,10
|
|
22,10
|
|
23,10
|
|
24,10
|
|
25,10
|
|
26,10
|
|
27,10
|
|
28,10
|
|
29,10
|
|
30,10
|
|
---- ERRORS
|
|
Column metadata states there are 50 values, but read 100 values from column element. file=__HDFS_FILENAME__ (1 of 2 similar)
|
|
====
|
|
---- QUERY
|
|
# Same as above but only selecting a single scalar column.
|
|
set num_nodes=1;
|
|
set num_scanner_threads=1;
|
|
select id from bad_column_metadata
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
1
|
|
2
|
|
3
|
|
4
|
|
5
|
|
6
|
|
7
|
|
8
|
|
9
|
|
10
|
|
11
|
|
12
|
|
13
|
|
14
|
|
15
|
|
16
|
|
17
|
|
18
|
|
19
|
|
20
|
|
21
|
|
22
|
|
23
|
|
24
|
|
25
|
|
26
|
|
27
|
|
28
|
|
29
|
|
30
|
|
---- ERRORS
|
|
Column metadata states there are 11 values, but read 10 values from column id. file=__HDFS_FILENAME__
|
|
====
|
|
---- QUERY
|
|
SELECT * from bad_parquet_strings_negative_len
|
|
---- TYPES
|
|
STRING
|
|
---- RESULTS
|
|
---- ERRORS
|
|
File '$NAMENODE/test-warehouse/bad_parquet_strings_negative_len_parquet/plain-encoded-negative-len.parq' is corrupt: error decoding value of type STRING at offset 58
|
|
File '$NAMENODE/test-warehouse/bad_parquet_strings_negative_len_parquet/dict-encoded-negative-len.parq' is corrupt: error reading dictionary for data of type STRING: could not decode dictionary
|
|
====
|
|
---- QUERY
|
|
SELECT * from bad_parquet_strings_out_of_bounds
|
|
---- TYPES
|
|
STRING
|
|
---- RESULTS
|
|
---- ERRORS
|
|
File '$NAMENODE/test-warehouse/bad_parquet_strings_out_of_bounds_parquet/plain-encoded-out-of-bounds.parq' is corrupt: error decoding value of type STRING at offset 58
|
|
File '$NAMENODE/test-warehouse/bad_parquet_strings_out_of_bounds_parquet/dict-encoded-out-of-bounds.parq' is corrupt: error reading dictionary for data of type STRING: could not decode dictionary
|
|
====
|
|
---- QUERY
|
|
# IMPALA-10808, IMPALA-10814: Check illegal decimal file schemas
|
|
select d1 from bad_parquet_decimals
|
|
---- CATCH
|
|
File '$NAMENODE/test-warehouse/bad_parquet_decimals_parquet/illegal_decimals.parq' column 'd1' does not have the decimal precision set.
|
|
====
|
|
---- QUERY
|
|
# IMPALA-10808, IMPALA-10814: Check illegal decimal file schemas
|
|
select d2 from bad_parquet_decimals
|
|
---- CATCH
|
|
File '$NAMENODE/test-warehouse/bad_parquet_decimals_parquet/illegal_decimals.parq' column 'd2' has a precision that does not match the table metadata precision. File metadata precision: 20, table metadata precision: 4.
|
|
====
|
|
---- QUERY
|
|
# IMPALA-10808, IMPALA-10814: Check illegal decimal file schemas
|
|
select d3 from functional_parquet.bad_parquet_decimals;
|
|
---- CATCH
|
|
File '$NAMENODE/test-warehouse/bad_parquet_decimals_parquet/illegal_decimals.parq' column 'd3' has a precision that does not match the table metadata precision. File metadata precision: -1, table metadata precision: 4.
|
|
====
|
|
---- QUERY
|
|
# IMPALA-10808, IMPALA-10814: Check illegal decimal file schemas
|
|
select d4 from functional_parquet.bad_parquet_decimals
|
|
---- CATCH
|
|
File '$NAMENODE/test-warehouse/bad_parquet_decimals_parquet/illegal_decimals.parq' column 'd4' does not have type_length set.
|
|
====
|
|
---- QUERY
|
|
# IMPALA-10808, IMPALA-10814: Check illegal decimal file schemas
|
|
select d5 from functional_parquet.bad_parquet_decimals
|
|
---- CATCH
|
|
File '$NAMENODE/test-warehouse/bad_parquet_decimals_parquet/illegal_decimals.parq' column 'd5' has invalid type length: 0
|
|
====
|
|
---- QUERY
|
|
# IMPALA-10808, IMPALA-10814: Check illegal decimal file schemas
|
|
select d6 from functional_parquet.bad_parquet_decimals
|
|
---- CATCH
|
|
File '$NAMENODE/test-warehouse/bad_parquet_decimals_parquet/illegal_decimals.parq' column 'd6' has invalid scale: -1. Precision is 4.
|
|
====
|
|
---- QUERY
|
|
# IMPALA-10808, IMPALA-10814: Check illegal decimal file schemas
|
|
select d7 from functional_parquet.bad_parquet_decimals;
|
|
---- CATCH
|
|
File '$NAMENODE/test-warehouse/bad_parquet_decimals_parquet/illegal_decimals.parq' column 'd7' has invalid scale: 4. Precision is 2.
|
|
====
|