mirror of
https://github.com/apache/impala.git
synced 2026-01-29 12:00:20 -05:00
- test_parquet_stats.py was missing and the tests weren't run during GVO. - The tests in parquet_stats.test assume that the queries were executed in a single fragment, so they now run with 'num_nodes = 1'. - Parquet columns are now resolved correctly. - Parquet files with missing columns are now handled correctly. - Predicates with implicit casts can now be evaluated against parquet::Statistics. - This change also cleans up some old friend declarations I came across. Change-Id: I54c205fad7afc4a0b0a7d0f654859de76db29a02 Reviewed-on: http://gerrit.cloudera.org:8080/6147 Reviewed-by: Lars Volker <lv@cloudera.com> Tested-by: Impala Public Jenkins
282 lines
8.0 KiB
Plaintext
282 lines
8.0 KiB
Plaintext
====
|
|
---- QUERY
|
|
select id, bool_col from functional_parquet.alltypessmall where int_col < 0
|
|
---- RESULTS
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
set explain_level=2;
|
|
explain select id, bool_col from functional_parquet.alltypessmall where int_col < 0;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
' parquet statistics predicates: int_col < 0'
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where tinyint_col < 0
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where smallint_col < 0
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where int_col < 0
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where bigint_col < 0
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where float_col < 0
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where double_col < 0
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
# Test with inverted predicate
|
|
select id, bool_col from functional_parquet.alltypessmall where -1 > int_col
|
|
---- RESULTS
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where tinyint_col > 9
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where smallint_col > 9
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
select id, bool_col from functional_parquet.alltypessmall where int_col > 9
|
|
---- RESULTS
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where bigint_col > 90
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where float_col > 9.9
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where double_col > 99
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where tinyint_col >= 10
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where tinyint_col <= 0
|
|
---- RESULTS
|
|
12
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 0 .*
|
|
====
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where tinyint_col >= 9
|
|
---- RESULTS
|
|
8
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 0 .*
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where tinyint_col = -1
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where tinyint_col = 10
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
set explain_level=2;
|
|
explain select count(*) from functional_parquet.alltypessmall where tinyint_col = 10
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
' parquet statistics predicates: tinyint_col = 10'
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where id >= 30 and id <= 80
|
|
---- RESULTS
|
|
51
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 1 .*
|
|
====
|
|
---- QUERY
|
|
# Mix with partitioning columns
|
|
select count(*) from functional_parquet.alltypessmall where int_col < 0 and year < 2012
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
# Test that adding a column without stats will not disable stats-based pruning.
|
|
select count(*) from functional_parquet.alltypessmall where int_col < 0 and string_col < ""
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
select id, bool_col from functional_parquet.alltypessmall where int_col < 3 - 3
|
|
---- RESULTS
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
select id, bool_col from functional_parquet.alltypessmall where int_col < 3 - 3
|
|
---- RESULTS
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
# Test that without expr rewrite and thus without constant folding, constant exprs still
|
|
# can be used to prune row groups.
|
|
set enable_expr_rewrites=0;
|
|
select id, bool_col from functional_parquet.alltypessmall where int_col < 3 - 3
|
|
---- RESULTS
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
select id, bool_col from functional_parquet.alltypessmall where 5 + 5 < int_col
|
|
---- RESULTS
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
# Test that without expr rewrite and thus without constant folding, constant exprs still
|
|
# can be used to prune row groups.
|
|
set enable_expr_rewrites=0;
|
|
select id, bool_col from functional_parquet.alltypessmall where 5 + 5 < int_col
|
|
---- RESULTS
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
# Test name based column resolution
|
|
create table name_resolve stored as parquet as select * from functional_parquet.alltypessmall;
|
|
alter table name_resolve replace columns (int_col int, bool_col boolean, tinyint_col tinyint, smallint_col smallint, id int);
|
|
set parquet_fallback_schema_resolution=NAME;
|
|
# If this picks up the stats from int_col, then it will filter all row groups and return
|
|
# an incorrect result.
|
|
select count(*) from name_resolve where id > 10;
|
|
---- RESULTS
|
|
89
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 1 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 0 .*
|
|
====
|
|
---- QUERY
|
|
# Query that has an implicit cast to a larger integer type
|
|
select count(*) from functional_parquet.alltypessmall where tinyint_col > 1000000000000
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 4 .*
|
|
====
|
|
---- QUERY
|
|
# Predicates with explicit casts are not supported when evaluating parquet::Statistics.
|
|
select count(*) from functional_parquet.alltypessmall where '0' > cast(tinyint_col as string)
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 4 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 0 .*
|
|
====
|
|
---- QUERY
|
|
# Explicit casts between numerical types can violate the transitivity of "min()", so they
|
|
# are not supported when evaluating parquet::Statistics.
|
|
select count(*) from functional_parquet.alltypes where cast(id as tinyint) < 10;
|
|
---- RESULTS
|
|
3878
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 24 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 0 .*
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.complextypestbl.int_array where pos < 5;
|
|
---- RESULTS
|
|
9
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumRowGroups: 2 .*
|
|
row_regex: .*NumStatsFilteredRowGroups: 0 .*
|
|
====
|