Files
impala/testdata/workloads/functional-query/queries/QueryTest/parquet.test
Skye Wanderman-Milne 68fef6a5bf IMPALA-2213: make Parquet scanner fail query if the file size metadata is stale
This patch changes the Parquet scanner to check if it can't read the
full footer scan range, indicating that file has been overwritten by a
shorter file without refreshing the table metadata. Before it would
DCHECK. This patch adds a test for this case, as well as the case
where the new file is longer than the metadata states (which fails
with an existing error).

Change-Id: Ie2031ac2dc90e4f2573bd3ca8a3709db60424f07
Reviewed-on: http://gerrit.cloudera.org:8080/1084
Tested-by: Internal Jenkins
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
2015-10-01 13:58:39 -07:00

55 lines
1.4 KiB
Plaintext

====
---- QUERY
# IMPALA-694: data file produced by parquet-mr version 1.2.5-cdh4.5.0
# IMPALA-720: data file with multiple row groups
SELECT * from bad_parquet where field = "parquet"
---- TYPES
string
---- RESULTS
'parquet'
'parquet'
'parquet'
'parquet'
====
---- QUERY
SELECT count(distinct field) from bad_parquet
---- TYPES
bigint
---- RESULTS
1005
====
---- QUERY
# Parquet file with invalid metadata size in the file footer.
SELECT * from bad_metadata_len
---- CATCH
Invalid metadata size in file footer
====
---- QUERY
# Parquet file with invalid column dict_page_offset.
SELECT * from bad_dict_page_offset
---- CATCH
Column 0 has invalid column offsets (offset=10000, size=47, file_size=249)
====
---- QUERY
# Parquet file with invalid column total_compressed_size.
SELECT * from bad_compressed_size
---- CATCH
Column 0 has invalid column offsets (offset=4, size=1000000, file_size=245)
====
---- QUERY
# Parquet file with required fields.
select * from kite_required_fields
---- TYPES
bigint,bigint,string,string,boolean,boolean,bigint,bigint,bigint,bigint
---- RESULTS
1,2,'foo','bar',true,false,1,2,3,4
1,NULL,'foo','NULL',true,NULL,NULL,NULL,3,4
100,NULL,'foooo','NULL',false,NULL,NULL,NULL,300,400
====
---- QUERY
# Parquet file with invalid magic number
SELECT * from bad_magic_number
---- CATCH
File '$NAMENODE/test-warehouse/bad_magic_number_parquet/bad_magic_number.parquet' has an invalid version number: XXXX
====