impala/testdata/workloads/functional-query/queries/QueryTest/parquet.test

====
---- QUERY
# IMPALA-694: data file produced by parquet-mr version 1.2.5-cdh4.5.0
# IMPALA-720: data file with multiple row groups
SELECT * from bad_parquet where field = "parquet"
---- TYPES
string
---- RESULTS
'parquet'
'parquet'
'parquet'
'parquet'
====
---- QUERY
SELECT count(distinct field) from bad_parquet
---- TYPES
bigint
---- RESULTS
1005
====
---- QUERY
# Parquet file with invalid metadata size in the file footer.
SELECT * from bad_metadata_len
---- CATCH
Invalid metadata size in file footer
====
---- QUERY
# Parquet file with invalid column dict_page_offset.
SELECT * from bad_dict_page_offset
---- CATCH
Column 0 has invalid data page offset (offset=100001 file_size=249)
====
---- QUERY
# Parquet file with invalid column total_compressed_size.
SELECT * from bad_compressed_size
---- CATCH
Column 0 has invalid column offsets (offset=4, size=1000000, file_size=245)
====
---- QUERY
# Parquet file with required fields.
select * from kite_required_fields
---- TYPES
bigint,bigint,string,string,boolean,boolean,bigint,bigint,bigint,bigint
---- RESULTS
1,2,'foo','bar',true,false,1,2,3,4
1,NULL,'foo','NULL',true,NULL,NULL,NULL,3,4
100,NULL,'foooo','NULL',false,NULL,NULL,NULL,300,400
====
---- QUERY
# Parquet file with invalid magic number
SELECT * from bad_magic_number
---- CATCH
File '$NAMENODE/test-warehouse/bad_magic_number_parquet/bad_magic_number.parquet' has an invalid version number: XXXX
====
---- QUERY
# count(*) query on parquet file with multiple blocks (one block per node)
SELECT count(*) from lineitem_multiblock
---- TYPES
bigint
---- RESULTS
20000
====
---- QUERY
# count(*) query on parquet file with more than one block per node
SELECT count(*) from lineitem_sixblocks
---- TYPES
bigint
---- RESULTS
40000
====
---- QUERY
# Select multiple columns from parquet file with multiple blocks (one block per node)
SELECT count(l_comment), min(l_partkey), max(l_linenumber) from lineitem_multiblock;
---- TYPES
bigint, bigint, int
---- RESULTS
20000,2,7
====
---- QUERY
# Select multiple columns from parquet file with more than one block per node
SELECT count(l_comment), min(l_partkey), max(l_linenumber) from lineitem_sixblocks;
---- TYPES
bigint, bigint, int
---- RESULTS
40000,2,7
====
---- QUERY
# Test limit queries on parquet with multiple blocks (one block per node)
select distinct l_orderkey from lineitem_multiblock where
l_orderkey < 5 or l_orderkey > 15000 order by l_orderkey limit 20;
---- TYPES
bigint
---- RESULTS
1
2
3
4
15008
15009
15010
15011
15012
15013
15014
15015
15040
15041
15042
15043
15044
15045
15046
15047
====
---- QUERY
# Test limit queries on parquet with more than one block per node
select distinct l_orderkey from lineitem_sixblocks where
l_orderkey < 5 or l_orderkey > 15000 order by l_orderkey limit 20;
---- TYPES
bigint
---- RESULTS
1
2
3
4
15008
15009
15010
15011
15012
15013
15014
15015
15040
15041
15042
15043
15044
15045
15046
15047
====
---- QUERY
# Test batch compaction logic with selective scan returning a variety of column types.
# On average there should be a couple of rows per batch of 1024.
select * from alltypesagg where id % 500 = 0
---- TYPES
int,boolean,tinyint,smallint,int,bigint,float,double,string,string,timestamp,int,int,int
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/10','0',2010-01-01 00:00:00,2010,1,1
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/10','0',2010-01-01 00:00:00,2010,1,NULL
500,true,NULL,NULL,500,5000,550,5050,'01/01/10','500',2010-01-01 08:40:47.500000000,2010,1,1
500,true,NULL,NULL,500,5000,550,5050,'01/01/10','500',2010-01-01 08:40:47.500000000,2010,1,NULL
1000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/02/10','0',2010-01-02 00:00:00,2010,1,2
1000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/02/10','0',2010-01-02 00:00:00,2010,1,NULL
1500,true,NULL,NULL,500,5000,550,5050,'01/02/10','500',2010-01-02 08:40:47.500000000,2010,1,2
1500,true,NULL,NULL,500,5000,550,5050,'01/02/10','500',2010-01-02 08:40:47.500000000,2010,1,NULL
2000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/03/10','0',2010-01-03 00:00:00,2010,1,3
2000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/03/10','0',2010-01-03 00:00:00,2010,1,NULL
2500,true,NULL,NULL,500,5000,550,5050,'01/03/10','500',2010-01-03 08:40:47.500000000,2010,1,3
2500,true,NULL,NULL,500,5000,550,5050,'01/03/10','500',2010-01-03 08:40:47.500000000,2010,1,NULL
3000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/04/10','0',2010-01-04 00:00:00,2010,1,4
3000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/04/10','0',2010-01-04 00:00:00,2010,1,NULL
3500,true,NULL,NULL,500,5000,550,5050,'01/04/10','500',2010-01-04 08:40:47.500000000,2010,1,4
3500,true,NULL,NULL,500,5000,550,5050,'01/04/10','500',2010-01-04 08:40:47.500000000,2010,1,NULL
4000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/05/10','0',2010-01-05 00:00:00,2010,1,5
4000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/05/10','0',2010-01-05 00:00:00,2010,1,NULL
4500,true,NULL,NULL,500,5000,550,5050,'01/05/10','500',2010-01-05 08:40:47.500000000,2010,1,5
4500,true,NULL,NULL,500,5000,550,5050,'01/05/10','500',2010-01-05 08:40:47.500000000,2010,1,NULL
5000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/06/10','0',2010-01-06 00:00:00,2010,1,6
5000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/06/10','0',2010-01-06 00:00:00,2010,1,NULL
5500,true,NULL,NULL,500,5000,550,5050,'01/06/10','500',2010-01-06 08:40:47.500000000,2010,1,6
5500,true,NULL,NULL,500,5000,550,5050,'01/06/10','500',2010-01-06 08:40:47.500000000,2010,1,NULL
6000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/07/10','0',2010-01-07 00:00:00,2010,1,7
6000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/07/10','0',2010-01-07 00:00:00,2010,1,NULL
6500,true,NULL,NULL,500,5000,550,5050,'01/07/10','500',2010-01-07 08:40:47.500000000,2010,1,7
6500,true,NULL,NULL,500,5000,550,5050,'01/07/10','500',2010-01-07 08:40:47.500000000,2010,1,NULL
7000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/08/10','0',2010-01-08 00:00:00,2010,1,8
7000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/08/10','0',2010-01-08 00:00:00,2010,1,NULL
7500,true,NULL,NULL,500,5000,550,5050,'01/08/10','500',2010-01-08 08:40:47.500000000,2010,1,8
7500,true,NULL,NULL,500,5000,550,5050,'01/08/10','500',2010-01-08 08:40:47.500000000,2010,1,NULL
8000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/09/10','0',2010-01-09 00:00:00,2010,1,9
8000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/09/10','0',2010-01-09 00:00:00,2010,1,NULL
8500,true,NULL,NULL,500,5000,550,5050,'01/09/10','500',2010-01-09 08:40:47.500000000,2010,1,9
8500,true,NULL,NULL,500,5000,550,5050,'01/09/10','500',2010-01-09 08:40:47.500000000,2010,1,NULL
9000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/10/10','0',2010-01-10 00:00:00,2010,1,10
9000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/10/10','0',2010-01-10 00:00:00,2010,1,NULL
9500,true,NULL,NULL,500,5000,550,5050,'01/10/10','500',2010-01-10 08:40:47.500000000,2010,1,10
9500,true,NULL,NULL,500,5000,550,5050,'01/10/10','500',2010-01-10 08:40:47.500000000,2010,1,NULL
====
---- QUERY
# Test batch compaction logic with selective scan returning a variety of column types.
# Most batches should be empty
select * from alltypesagg where id = 5000
---- TYPES
int,boolean,tinyint,smallint,int,bigint,float,double,string,string,timestamp,int,int,int
---- RESULTS: VERIFY_IS_EQUAL_SORTED
5000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/06/10','0',2010-01-06 00:00:00,2010,1,6
5000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/06/10','0',2010-01-06 00:00:00,2010,1,NULL
====