Files
impala/testdata/workloads/functional-query/queries/QueryTest/compute-stats-decimal.test
Bharath Vissapragada 04d027df13 IMPALA-7659: Populate NULL count while computing column stats
It was disabled for performance reasons (IMPALA-1003) and this patch
re-enables it since a lot of codegen improvements have happened since
then.

This patch switches the aggregation to use the CASE conditional instead
of IF since the former has proper codegen support (IMPALA-7655).

Tests:
=====

- Updated the affected tests to include the null counts.
- Added unit tests that verify IS [NOT] NULL predicates' cardinality
  estimation.

Perf note:
=========

I reran the compute stats child query with null counts included on the
store_sales table from 1000 SF (1TB) tpcds dataset. The table had 22
non-partitioned columns (on which null counts were computed) and ~2.8B
rows. This experiment showed around 7-8% perf drop compared to the same
child query without null counts for these columns.

Change-Id: Ic68f8b4c3756eb1980ce299a602a7d56db1e507a
Reviewed-on: http://gerrit.cloudera.org:8080/11565
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2018-12-08 00:21:55 +00:00

65 lines
1.8 KiB
Plaintext

====
---- QUERY
# test compute stats on a partitioned decimal text table
create table decimal_tbl like functional.decimal_tbl;
insert into decimal_tbl partition(d6)
select * from functional.decimal_tbl;
====
---- QUERY
compute stats decimal_tbl
---- RESULTS
'Updated 1 partition(s) and 5 column(s).'
====
---- QUERY
show table stats decimal_tbl
---- LABELS
d6, #Rows, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental Stats, Location
---- RESULTS
'1',5,1,'375B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
'Total',5,1,'375B','0B','','','',''
---- TYPES
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats decimal_tbl
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'd1','DECIMAL(9,0)',4,0,4,4
'd2','DECIMAL(10,0)',3,0,8,8
'd3','DECIMAL(20,10)',5,0,16,16
'd4','DECIMAL(38,38)',1,0,16,16
'd5','DECIMAL(10,5)',5,0,8,8
'd6','DECIMAL(9,0)',1,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# test compute stats on a mixed-type parquet table
create table mixed_types(a int, b decimal(10,0)) stored as parquet;
insert into mixed_types values (1, 2), (3, 4);
====
---- QUERY
compute stats mixed_types
---- RESULTS
'Updated 1 partition(s) and 2 column(s).'
====
---- QUERY
show table stats mixed_types
---- LABELS
#Rows, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental Stats, Location
---- RESULTS
2,1,regex:.+B,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
---- TYPES
BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats mixed_types
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'a','INT',2,0,4,4
'b','DECIMAL(10,0)',2,0,8,8
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====