mirror of
https://github.com/apache/impala.git
synced 2026-01-08 03:02:48 -05:00
In a recent patch (IMPALA-5036) a bug was introduced where a count(*) query with a group by a string partition column returned incorrect results. Data was being written into the tuple at an incorrect offset. Testing: - Added an end to end test where we are selecting from a table partitioned by string. Change-Id: I225547574c2b2259ca81cb642d082e151f3bed6b Reviewed-on: http://gerrit.cloudera.org:8080/7481 Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com> Tested-by: Impala Public Jenkins
140 lines
2.8 KiB
Plaintext
140 lines
2.8 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Tests the correctness of the Parquet count(*) optimization.
|
|
select count(1)
|
|
from functional_parquet.alltypes
|
|
---- RESULTS
|
|
7300
|
|
---- TYPES
|
|
bigint
|
|
=====
|
|
---- QUERY
|
|
# Parquet count(*) optimization with predicates on the partition columns.
|
|
select count(1)
|
|
from functional_parquet.alltypes where year < 2010 and month > 8
|
|
---- RESULTS
|
|
1220
|
|
---- TYPES
|
|
bigint
|
|
=====
|
|
---- QUERY
|
|
# Parquet count(*) optimization with group by partition columns.
|
|
select year, month, count(1)
|
|
from functional_parquet.alltypes group by year, month
|
|
---- RESULTS
|
|
2009,1,310
|
|
2009,2,280
|
|
2009,3,310
|
|
2009,4,300
|
|
2009,5,310
|
|
2009,6,300
|
|
2009,7,310
|
|
2009,8,310
|
|
2009,9,300
|
|
2009,10,310
|
|
2009,11,300
|
|
2009,12,310
|
|
2010,1,310
|
|
2010,2,280
|
|
2010,3,310
|
|
2010,4,300
|
|
2010,5,310
|
|
2010,6,300
|
|
2010,7,310
|
|
2010,8,310
|
|
2010,9,300
|
|
2010,10,310
|
|
2010,11,300
|
|
2010,12,310
|
|
---- TYPES
|
|
int, int, bigint
|
|
=====
|
|
---- QUERY
|
|
# Parquet count(*) optimization with both group by and predicates on partition columns.
|
|
select count(1)
|
|
from functional_parquet.alltypes where year < 2010 and month > 8
|
|
group by month
|
|
---- RESULTS
|
|
310
|
|
300
|
|
310
|
|
300
|
|
---- TYPES
|
|
bigint
|
|
=====
|
|
---- QUERY
|
|
# Parquet count(*) optimization with the result going into a join.
|
|
select x.bigint_col from functional.alltypes x
|
|
inner join (
|
|
select count(1) as a from functional_parquet.alltypes group by year
|
|
) t on x.id = t.a;
|
|
---- RESULTS
|
|
0
|
|
0
|
|
---- TYPES
|
|
bigint
|
|
=====
|
|
---- QUERY
|
|
# Parquet count(*) optimization with the agg function in the having clause.
|
|
select 1 from functional_parquet.alltypes having count(*) > 1
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
tinyint
|
|
====
|
|
---- QUERY
|
|
# Verify that 0 is returned for count(*) on an empty table.
|
|
select count(1) from functional_parquet.emptytable
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
bigint
|
|
=====
|
|
---- QUERY
|
|
# Verify that 0 is returned when all partitions are pruned.
|
|
select count(1) from functional_parquet.alltypes where year = -1
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
bigint
|
|
=====
|
|
---- QUERY
|
|
# Test different row group size combinations.
|
|
select count(*) from functional_parquet.lineitem_multiblock
|
|
union all
|
|
select count(*) from functional_parquet.lineitem_multiblock_one_row_group
|
|
union all
|
|
select count(*) from functional_parquet.lineitem_sixblocks
|
|
union all
|
|
select count(*) from tpch_parquet.lineitem
|
|
---- RESULTS
|
|
20000
|
|
40000
|
|
40000
|
|
6001215
|
|
---- TYPES
|
|
bigint
|
|
=====
|
|
---- QUERY
|
|
# IMPALA-5679: Count(*) with group by on a string partition column.
|
|
drop table if exists $DATABASE.string_partitioned_table;
|
|
create table $DATABASE.string_partitioned_table (int_col integer)
|
|
partitioned by (string_col STRING) stored as parquet;
|
|
insert into $DATABASE.string_partitioned_table partition(string_col)
|
|
select int_col, string_col from functional.alltypes;
|
|
select string_col, count(*) from $DATABASE.string_partitioned_table group by string_col;
|
|
---- RESULTS
|
|
'0',730
|
|
'1',730
|
|
'2',730
|
|
'3',730
|
|
'4',730
|
|
'5',730
|
|
'6',730
|
|
'7',730
|
|
'8',730
|
|
'9',730
|
|
---- TYPES
|
|
string, bigint
|
|
=====
|