Files
impala/testdata/workloads/functional-query/queries/QueryTest/parquet.test
Sailesh Mukil 7778b3ded5 IMPALA-1881: Maximize data locality when scanning Parquet files with multiple row groups.
Impala supports reading Parquet files with multiple row groups but
with possible performance degradation due to remote reads. This patch
maximizes scan locality by allowing multiple impalads to scan the
rowgroups in their local splits. Each impalad starts a new scan range
for each split local to it if that split contains row group(s) that
need to be scanned.

Change-Id: Iaecc5fb8e89364780bc59dbfa9ae51d0d124d16e
Reviewed-on: http://gerrit.cloudera.org:8080/908
Reviewed-by: Sailesh Mukil <sailesh@cloudera.com>
Tested-by: Internal Jenkins
2015-10-05 11:30:39 -07:00

99 lines
2.1 KiB
Plaintext

====
---- QUERY
# IMPALA-694: data file produced by parquet-mr version 1.2.5-cdh4.5.0
# IMPALA-720: data file with multiple row groups
SELECT * from bad_parquet where field = "parquet"
---- TYPES
string
---- RESULTS
'parquet'
'parquet'
'parquet'
'parquet'
====
---- QUERY
SELECT count(distinct field) from bad_parquet
---- TYPES
bigint
---- RESULTS
1005
====
---- QUERY
# Parquet file with invalid metadata size in the file footer.
SELECT * from bad_metadata_len
---- CATCH
Invalid metadata size in file footer
====
---- QUERY
# Parquet file with invalid column dict_page_offset.
SELECT * from bad_dict_page_offset
---- CATCH
Column 0 has invalid column offsets (offset=10000, size=47, file_size=249)
====
---- QUERY
# Parquet file with invalid column total_compressed_size.
SELECT * from bad_compressed_size
---- CATCH
Column 0 has invalid column offsets (offset=4, size=1000000, file_size=245)
====
---- QUERY
# Parquet file with required fields.
select * from kite_required_fields
---- TYPES
bigint,bigint,string,string,boolean,boolean,bigint,bigint,bigint,bigint
---- RESULTS
1,2,'foo','bar',true,false,1,2,3,4
1,NULL,'foo','NULL',true,NULL,NULL,NULL,3,4
100,NULL,'foooo','NULL',false,NULL,NULL,NULL,300,400
====
---- QUERY
# Parquet file with invalid magic number
SELECT * from bad_magic_number
---- CATCH
File '$NAMENODE/test-warehouse/bad_magic_number_parquet/bad_magic_number.parquet' has an invalid version number: XXXX
====
---- QUERY
# count(*) query on parquet file with multiple blocks
SELECT count(*) from lineitem_multiblock
---- TYPES
bigint
---- RESULTS
20000
====
---- QUERY
# Select multiple columns from parquet file with multiple blocks
SELECT count(l_comment), min(l_partkey), max(l_linenumber) from lineitem_multiblock;
---- TYPES
bigint, bigint, int
---- RESULTS
20000,2,7
====
---- QUERY
# Test limit queries on parquet with multiple blocks
select distinct l_orderkey from functional_parquet.lineitem_multiblock where
l_orderkey < 5 or l_orderkey > 15000 order by l_orderkey limit 20;
---- TYPES
bigint
---- RESULTS
1
2
3
4
15008
15009
15010
15011
15012
15013
15014
15015
15040
15041
15042
15043
15044
15045
15046
15047
====