mirror of
https://github.com/apache/impala.git
synced 2026-01-06 06:01:03 -05:00
Quick fix of Parquet write path until the Parquet community agrees on the ordering of floating point numbers. The behavior follows the way fmax()/fmin() works, ie. Impala will only write NaN into the stats when all the values are NaNs. This behavior is aligned with the quick fix of Parquet-CPP. Added e2e tests as well. Change-Id: I3957806948f7c661af4be5495f2ec92d1e9fc9d6 Reviewed-on: http://gerrit.cloudera.org:8080/9381 Reviewed-by: Lars Volker <lv@cloudera.com> Tested-by: Impala Public Jenkins
630 lines
18 KiB
Plaintext
630 lines
18 KiB
Plaintext
====
|
|
---- QUERY
|
|
select id, bool_col from functional_parquet.alltypessmall where int_col < 0
|
|
---- RESULTS
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
set explain_level=2;
|
|
explain select id, bool_col from functional_parquet.alltypessmall where int_col < 0;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
' parquet statistics predicates: int_col < 0'
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where tinyint_col < 0
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where smallint_col < 0
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where int_col < 0
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where bigint_col < 0
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where float_col < 0
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where double_col < 0
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
# Test with inverted predicate
|
|
select id, bool_col from functional_parquet.alltypessmall where -1 > int_col
|
|
---- RESULTS
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where tinyint_col > 9
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where smallint_col > 9
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select id, bool_col from functional_parquet.alltypessmall where int_col > 9
|
|
---- RESULTS
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where bigint_col > 90
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where float_col > 9.9
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where double_col > 99
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where tinyint_col >= 10
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where tinyint_col <= 0
|
|
---- RESULTS
|
|
12
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 0
|
|
====
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where tinyint_col >= 9
|
|
---- RESULTS
|
|
8
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 0
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where tinyint_col = -1
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where tinyint_col = 10
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
set explain_level=2;
|
|
explain select count(*) from functional_parquet.alltypessmall where tinyint_col = 10
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
' parquet statistics predicates: tinyint_col = 10'
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where id >= 30 and id <= 80
|
|
---- RESULTS
|
|
51
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Mix with partitioning columns
|
|
select count(*) from functional_parquet.alltypessmall where int_col < 0 and year < 2012
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select id, bool_col from functional_parquet.alltypessmall where int_col < 3 - 3
|
|
---- RESULTS
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select id, bool_col from functional_parquet.alltypessmall where int_col < 3 - 3
|
|
---- RESULTS
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
# Test that without expr rewrite and thus without constant folding, constant exprs still
|
|
# can be used to prune row groups.
|
|
set enable_expr_rewrites=0;
|
|
select id, bool_col from functional_parquet.alltypessmall where int_col < 3 - 3
|
|
---- RESULTS
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select id, bool_col from functional_parquet.alltypessmall where 5 + 5 < int_col
|
|
---- RESULTS
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
# Test that without expr rewrite and thus without constant folding, constant exprs still
|
|
# can be used to prune row groups.
|
|
set enable_expr_rewrites=0;
|
|
select id, bool_col from functional_parquet.alltypessmall where 5 + 5 < int_col
|
|
---- RESULTS
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
# Test name based column resolution
|
|
create table name_resolve stored as parquet as select * from functional_parquet.alltypessmall;
|
|
alter table name_resolve replace columns (int_col int, bool_col boolean, tinyint_col tinyint, smallint_col smallint, id int);
|
|
set parquet_fallback_schema_resolution=NAME;
|
|
# If this picks up the stats from int_col, then it will filter all row groups and return
|
|
# an incorrect result.
|
|
select count(*) from name_resolve where id > 10;
|
|
---- RESULTS
|
|
89
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 1
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 0
|
|
====
|
|
---- QUERY
|
|
# Query that has an implicit cast to a larger integer type
|
|
select count(*) from functional_parquet.alltypessmall where tinyint_col > 1000000000000
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
# Predicates with explicit casts are not supported when evaluating parquet::Statistics.
|
|
select count(*) from functional_parquet.alltypessmall where '0' > cast(tinyint_col as string)
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 0
|
|
====
|
|
---- QUERY
|
|
# Explicit casts between numerical types can violate the transitivity of "min()", so they
|
|
# are not supported when evaluating parquet::Statistics.
|
|
select count(*) from functional_parquet.alltypes where cast(id as tinyint) < 10;
|
|
---- RESULTS
|
|
3878
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 24
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 0
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.complextypestbl.int_array where pos < 5;
|
|
---- RESULTS
|
|
9
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 2
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 0
|
|
====
|
|
---- QUERY
|
|
# Test the conversion of constant IN lists to min/max predicats
|
|
select count(*) from functional_parquet.alltypes where int_col in (-1,-2,-3,-4);
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 24
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 24
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypes where id IN (1,25,49);
|
|
---- RESULTS
|
|
3
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 24
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 23
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where string_col < "0"
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where string_col <= "/"
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where string_col < "1"
|
|
---- RESULTS
|
|
12
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 0
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where string_col >= "9"
|
|
---- RESULTS
|
|
8
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 0
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where string_col > ":"
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where timestamp_col < "2009-01-01 00:00:00"
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where timestamp_col <= "2009-01-01 00:00:00"
|
|
---- RESULTS
|
|
1
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 3
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where timestamp_col = "2009-01-01 00:00:00"
|
|
---- RESULTS
|
|
1
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 3
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where timestamp_col > "2009-04-03 00:24:00.96"
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where timestamp_col >= "2009-04-03 00:24:00.96"
|
|
---- RESULTS
|
|
1
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 3
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.alltypessmall where timestamp_col = "2009-04-03 00:24:00.96"
|
|
---- RESULTS
|
|
1
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 3
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.decimal_tbl where d1 < 1234
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 1
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.decimal_tbl where d3 < 1.23456789
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 1
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.decimal_tbl where d3 = 1.23456788
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 1
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.decimal_tbl where d3 = 1.23456789
|
|
---- RESULTS
|
|
1
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 1
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 0
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.decimal_tbl where d4 > 0.123456789
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 1
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.decimal_tbl where d4 >= 0.12345678
|
|
---- RESULTS
|
|
5
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 1
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 0
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_parquet.decimal_tbl where d4 >= 0.12345679
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 1
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test that stats are disabled for CHAR type columns.
|
|
create table chars (id int, c char(4)) stored as parquet;
|
|
insert into chars values (1, cast("abaa" as char(4))), (2, cast("abab" as char(4)));
|
|
select count(*) from chars;
|
|
---- RESULTS
|
|
2
|
|
====
|
|
---- QUERY
|
|
select count(*) from chars where c <= "aaaa"
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 1
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 0
|
|
====
|
|
---- QUERY
|
|
# IMPALA-4988: Test that stats support can be disabled using the parquet_read_statistics
|
|
# query option.
|
|
set parquet_read_statistics=0;
|
|
select count(*) from functional_parquet.alltypes where id < 0;
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 24
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 0
|
|
====
|
|
---- QUERY
|
|
# Check that all the row groups are skipped using null_count stat
|
|
create table table_for_null_count_test (i int, j int) stored as parquet;
|
|
insert into table_for_null_count_test values (1, NULL), (2, NULL), (3, NULL);
|
|
select count(*) from table_for_null_count_test where j < 3;
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 1
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Insert another row group where not all the 'j' values are NULL
|
|
insert into table_for_null_count_test values (4, 1), (5, NULL);
|
|
select i from table_for_null_count_test where j < 3;
|
|
---- RESULTS
|
|
4
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 2
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Turning off parquet stats and verifying that no row groups are skipped
|
|
set PARQUET_READ_STATISTICS=0;
|
|
create table table_for_null_count_test2 (i int, j int) stored as parquet;
|
|
insert into table_for_null_count_test2 values (1, NULL), (2, NULL), (3, NULL);
|
|
select count(*) from table_for_null_count_test2 where j < 3;
|
|
---- RESULTS
|
|
0
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 1
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 0
|
|
====
|
|
---- QUERY
|
|
# IMPALA-6527: NaN values lead to incorrect filtering.
|
|
# When the first value is NaN in a column chunk, Impala chose it as min_value and
|
|
# max_value for statistics. Test if it is no longer the case.
|
|
create table test_nan(val double) stored as parquet;
|
|
insert into test_nan values (cast('NaN' as double)), (42);
|
|
select * from test_nan where val > 0
|
|
---- RESULTS
|
|
42
|
|
====
|
|
---- QUERY
|
|
# IMPALA-6527: NaN values lead to incorrect filtering
|
|
# Test if '<' predicate produces expected results as well.
|
|
select * from test_nan where val < 100
|
|
---- RESULTS
|
|
42
|
|
====
|
|
---- QUERY
|
|
# IMPALA-6527: NaN values lead to incorrect filtering
|
|
# Test if valid statistics are written. The column chunk should be filtered out by
|
|
# the min filter.
|
|
select * from test_nan where val < 10
|
|
---- RESULTS
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 1
|
|
aggregation(SUM, NumStatsFilteredRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# IMPALA-6527: NaN values lead to incorrect filtering
|
|
# Test predicate that is true for NaN.
|
|
select * from test_nan where not val >= 0
|
|
---- RESULTS
|
|
NaN
|
|
====
|
|
---- QUERY
|
|
# IMPALA-6527: NaN values lead to incorrect filtering
|
|
# Test predicate that is true for NaN.
|
|
select * from test_nan where val != 0
|
|
---- RESULTS
|
|
NaN
|
|
42
|
|
====
|
|
---- QUERY
|
|
# Statistics filtering must not filter out row groups if predicate can be true for NaN.
|
|
create table test_nan_true_predicate(val double) stored as parquet;
|
|
insert into test_nan_true_predicate values (10), (20), (cast('NaN' as double));
|
|
select * from test_nan_true_predicate where not val >= 0
|
|
---- RESULTS
|
|
NaN
|
|
====
|
|
---- QUERY
|
|
# NaN is the last element, predicate is true for NaN and value.
|
|
select * from test_nan_true_predicate where not val >= 20
|
|
---- RESULTS
|
|
10
|
|
NaN
|
|
====
|
|
---- QUERY
|
|
# NaN is the last element, predicate is true for NaN and value.
|
|
select * from test_nan_true_predicate where val != 10
|
|
---- RESULTS
|
|
20
|
|
NaN
|
|
====
|
|
---- QUERY
|
|
# Test the case when NaN is inserted between two values.
|
|
# Test predicate true for NaN and false for the values.
|
|
create table test_nan_in_the_middle(val double) stored as parquet;
|
|
insert into test_nan_in_the_middle values (10), (cast('NaN' as double)), (20);
|
|
select * from test_nan_in_the_middle where not val >= 0
|
|
---- RESULTS
|
|
NaN
|
|
====
|
|
---- QUERY
|
|
# NaN in the middle, predicate true for NaN and value.
|
|
select * from test_nan_in_the_middle where not val >= 20
|
|
---- RESULTS
|
|
10
|
|
NaN
|
|
====
|
|
---- QUERY
|
|
# NaN in the middle, '!=' should return NaN and value.
|
|
select * from test_nan_in_the_middle where val != 10
|
|
---- RESULTS
|
|
NaN
|
|
20
|
|
====
|
|
---- QUERY
|
|
# Test the case when there are only NaNs in the column chunk.
|
|
# Test predicate true for NaN
|
|
create table test_nan_only(val double) stored as parquet;
|
|
insert into test_nan_only values (cast('NaN' as double)), (cast('NaN' as double)),
|
|
(cast('NaN' as double));
|
|
select * from test_nan_only where not val >= 0
|
|
---- RESULTS
|
|
NaN
|
|
NaN
|
|
NaN
|
|
====
|
|
---- QUERY
|
|
# There are only NaN values, predicate is false for NaN
|
|
select * from test_nan_only where val >= 20
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Test the case when a number is following multiple NaNs.
|
|
# Test predicate true for NaN, false for the inserted number
|
|
create table test_multiple_nans(val double) stored as parquet;
|
|
insert into test_multiple_nans values (cast('NaN' as double)), (cast('NaN' as double)),
|
|
(cast('NaN' as double)), (20);
|
|
select * from test_multiple_nans where not val >= 0
|
|
---- RESULTS
|
|
NaN
|
|
NaN
|
|
NaN
|
|
====
|
|
---- QUERY
|
|
# Multiple NaNs followed by a number, predicate is false for NaN and true for the number
|
|
select * from test_multiple_nans where val >= 20
|
|
---- RESULTS
|
|
20
|
|
====
|
|
---- QUERY
|
|
# Multiple NaNs followed by a number, predicate is true for NaN and true for the number
|
|
select * from test_multiple_nans where not val > 20
|
|
---- RESULTS
|
|
NaN
|
|
NaN
|
|
NaN
|
|
20
|
|
====
|
|
---- QUERY
|
|
# Multiple NaNs followed by a number, predicate is false for NaN and false for the number
|
|
select * from test_multiple_nans where val > 20
|
|
---- RESULTS
|
|
====
|