Files
impala/testdata/workloads/functional-query/queries/QueryTest/parquet.test
Tim Armstrong b4343895d8 IMPALA-4923: reduce memory transfer for selective scans
Most of the code changes are to restructure things so that the
scratch batch's tuple buffer is stored in a separate MemPool
from auxiliary memory such as decompression buffers. This part
of the change does not change the behaviour of the scanner in
itself, but allows us to recycle the tuple buffer without holding
onto unused auxiliary memory.

The optimisation is implemented in TryCompact(): if enough rows
were filtered out during the copy from the scratch batch to the
output batch, the fixed-length portions of the surviving rows
(if any) are copied to a new, smaller, buffer, and the original,
larger, buffer is reused for the next scratch batch.

Previously the large buffer was always attached to the output batch,
so a large buffer was transferred between threads for every scratch
batch processed. In combination with the decompression buffer change
in IMPALA-5304, this means that in many cases selective scans don't
produce nearly as many empty or near-empty batches and do not attach
nearly as much memory to each batch.

Performance:
Even on an 8 core machine I see some speedup on selective scans.
Profiling with "perf top" also showed that time in TCMalloc
was reduced - it went from several % of CPU time to a minimal
amount.

Running TPC-H on the same machine showed a ~5% overall improvement
and no regressions. E.g. Q6 got 20-25% faster.

I hope to do some additional cluster benchmarking on systems
with more cores to verify that the severe performance problems
there are fixed, but in the meantime it seems like we have enough
evidence that it will at least improve things.

Testing:
Add a couple of selective scans that exercise the new code paths.

Change-Id: I3773dc63c498e295a2c1386a15c5e69205e747ea
Reviewed-on: http://gerrit.cloudera.org:8080/6949
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Impala Public Jenkins
2017-05-25 02:55:36 +00:00

201 lines
7.2 KiB
Plaintext

====
---- QUERY
# IMPALA-694: data file produced by parquet-mr version 1.2.5-cdh4.5.0
# IMPALA-720: data file with multiple row groups
SELECT * from bad_parquet where field = "parquet"
---- TYPES
string
---- RESULTS
'parquet'
'parquet'
'parquet'
'parquet'
====
---- QUERY
SELECT count(distinct field) from bad_parquet
---- TYPES
bigint
---- RESULTS
1005
====
---- QUERY
# Parquet file with invalid metadata size in the file footer.
SELECT * from bad_metadata_len
---- CATCH
Invalid metadata size in file footer
====
---- QUERY
# Parquet file with invalid column dict_page_offset.
SELECT * from bad_dict_page_offset
---- CATCH
Column 0 has invalid data page offset (offset=100001 file_size=249)
====
---- QUERY
# Parquet file with invalid column total_compressed_size.
SELECT * from bad_compressed_size
---- CATCH
Column 0 has invalid column offsets (offset=4, size=1000000, file_size=245)
====
---- QUERY
# Parquet file with required fields.
select * from kite_required_fields
---- TYPES
bigint,bigint,string,string,boolean,boolean,bigint,bigint,bigint,bigint
---- RESULTS
1,2,'foo','bar',true,false,1,2,3,4
1,NULL,'foo','NULL',true,NULL,NULL,NULL,3,4
100,NULL,'foooo','NULL',false,NULL,NULL,NULL,300,400
====
---- QUERY
# Parquet file with invalid magic number
SELECT * from bad_magic_number
---- CATCH
File '$NAMENODE/test-warehouse/bad_magic_number_parquet/bad_magic_number.parquet' has an invalid version number: XXXX
====
---- QUERY
# count(*) query on parquet file with multiple blocks (one block per node)
SELECT count(*) from lineitem_multiblock
---- TYPES
bigint
---- RESULTS
20000
====
---- QUERY
# count(*) query on parquet file with more than one block per node
SELECT count(*) from lineitem_sixblocks
---- TYPES
bigint
---- RESULTS
40000
====
---- QUERY
# Select multiple columns from parquet file with multiple blocks (one block per node)
SELECT count(l_comment), min(l_partkey), max(l_linenumber) from lineitem_multiblock;
---- TYPES
bigint, bigint, int
---- RESULTS
20000,2,7
====
---- QUERY
# Select multiple columns from parquet file with more than one block per node
SELECT count(l_comment), min(l_partkey), max(l_linenumber) from lineitem_sixblocks;
---- TYPES
bigint, bigint, int
---- RESULTS
40000,2,7
====
---- QUERY
# Test limit queries on parquet with multiple blocks (one block per node)
select distinct l_orderkey from lineitem_multiblock where
l_orderkey < 5 or l_orderkey > 15000 order by l_orderkey limit 20;
---- TYPES
bigint
---- RESULTS
1
2
3
4
15008
15009
15010
15011
15012
15013
15014
15015
15040
15041
15042
15043
15044
15045
15046
15047
====
---- QUERY
# Test limit queries on parquet with more than one block per node
select distinct l_orderkey from lineitem_sixblocks where
l_orderkey < 5 or l_orderkey > 15000 order by l_orderkey limit 20;
---- TYPES
bigint
---- RESULTS
1
2
3
4
15008
15009
15010
15011
15012
15013
15014
15015
15040
15041
15042
15043
15044
15045
15046
15047
====
---- QUERY
# Test batch compaction logic with selective scan returning a variety of column types.
# On average there should be a couple of rows per batch of 1024.
select * from alltypesagg where id % 500 = 0
---- TYPES
int,boolean,tinyint,smallint,int,bigint,float,double,string,string,timestamp,int,int,int
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/10','0',2010-01-01 00:00:00,2010,1,1
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/10','0',2010-01-01 00:00:00,2010,1,NULL
500,true,NULL,NULL,500,5000,550,5050,'01/01/10','500',2010-01-01 08:40:47.500000000,2010,1,1
500,true,NULL,NULL,500,5000,550,5050,'01/01/10','500',2010-01-01 08:40:47.500000000,2010,1,NULL
1000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/02/10','0',2010-01-02 00:00:00,2010,1,2
1000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/02/10','0',2010-01-02 00:00:00,2010,1,NULL
1500,true,NULL,NULL,500,5000,550,5050,'01/02/10','500',2010-01-02 08:40:47.500000000,2010,1,2
1500,true,NULL,NULL,500,5000,550,5050,'01/02/10','500',2010-01-02 08:40:47.500000000,2010,1,NULL
2000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/03/10','0',2010-01-03 00:00:00,2010,1,3
2000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/03/10','0',2010-01-03 00:00:00,2010,1,NULL
2500,true,NULL,NULL,500,5000,550,5050,'01/03/10','500',2010-01-03 08:40:47.500000000,2010,1,3
2500,true,NULL,NULL,500,5000,550,5050,'01/03/10','500',2010-01-03 08:40:47.500000000,2010,1,NULL
3000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/04/10','0',2010-01-04 00:00:00,2010,1,4
3000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/04/10','0',2010-01-04 00:00:00,2010,1,NULL
3500,true,NULL,NULL,500,5000,550,5050,'01/04/10','500',2010-01-04 08:40:47.500000000,2010,1,4
3500,true,NULL,NULL,500,5000,550,5050,'01/04/10','500',2010-01-04 08:40:47.500000000,2010,1,NULL
4000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/05/10','0',2010-01-05 00:00:00,2010,1,5
4000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/05/10','0',2010-01-05 00:00:00,2010,1,NULL
4500,true,NULL,NULL,500,5000,550,5050,'01/05/10','500',2010-01-05 08:40:47.500000000,2010,1,5
4500,true,NULL,NULL,500,5000,550,5050,'01/05/10','500',2010-01-05 08:40:47.500000000,2010,1,NULL
5000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/06/10','0',2010-01-06 00:00:00,2010,1,6
5000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/06/10','0',2010-01-06 00:00:00,2010,1,NULL
5500,true,NULL,NULL,500,5000,550,5050,'01/06/10','500',2010-01-06 08:40:47.500000000,2010,1,6
5500,true,NULL,NULL,500,5000,550,5050,'01/06/10','500',2010-01-06 08:40:47.500000000,2010,1,NULL
6000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/07/10','0',2010-01-07 00:00:00,2010,1,7
6000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/07/10','0',2010-01-07 00:00:00,2010,1,NULL
6500,true,NULL,NULL,500,5000,550,5050,'01/07/10','500',2010-01-07 08:40:47.500000000,2010,1,7
6500,true,NULL,NULL,500,5000,550,5050,'01/07/10','500',2010-01-07 08:40:47.500000000,2010,1,NULL
7000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/08/10','0',2010-01-08 00:00:00,2010,1,8
7000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/08/10','0',2010-01-08 00:00:00,2010,1,NULL
7500,true,NULL,NULL,500,5000,550,5050,'01/08/10','500',2010-01-08 08:40:47.500000000,2010,1,8
7500,true,NULL,NULL,500,5000,550,5050,'01/08/10','500',2010-01-08 08:40:47.500000000,2010,1,NULL
8000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/09/10','0',2010-01-09 00:00:00,2010,1,9
8000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/09/10','0',2010-01-09 00:00:00,2010,1,NULL
8500,true,NULL,NULL,500,5000,550,5050,'01/09/10','500',2010-01-09 08:40:47.500000000,2010,1,9
8500,true,NULL,NULL,500,5000,550,5050,'01/09/10','500',2010-01-09 08:40:47.500000000,2010,1,NULL
9000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/10/10','0',2010-01-10 00:00:00,2010,1,10
9000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/10/10','0',2010-01-10 00:00:00,2010,1,NULL
9500,true,NULL,NULL,500,5000,550,5050,'01/10/10','500',2010-01-10 08:40:47.500000000,2010,1,10
9500,true,NULL,NULL,500,5000,550,5050,'01/10/10','500',2010-01-10 08:40:47.500000000,2010,1,NULL
====
---- QUERY
# Test batch compaction logic with selective scan returning a variety of column types.
# Most batches should be empty
select * from alltypesagg where id = 5000
---- TYPES
int,boolean,tinyint,smallint,int,bigint,float,double,string,string,timestamp,int,int,int
---- RESULTS: VERIFY_IS_EQUAL_SORTED
5000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/06/10','0',2010-01-06 00:00:00,2010,1,6
5000,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/06/10','0',2010-01-06 00:00:00,2010,1,NULL
====