mirror of
https://github.com/apache/impala.git
synced 2026-01-29 03:00:27 -05:00
It was disabled for performance reasons (IMPALA-1003) and this patch re-enables it since a lot of codegen improvements have happened since then. This patch switches the aggregation to use the CASE conditional instead of IF since the former has proper codegen support (IMPALA-7655). Tests: ===== - Updated the affected tests to include the null counts. - Added unit tests that verify IS [NOT] NULL predicates' cardinality estimation. Perf note: ========= I reran the compute stats child query with null counts included on the store_sales table from 1000 SF (1TB) tpcds dataset. The table had 22 non-partitioned columns (on which null counts were computed) and ~2.8B rows. This experiment showed around 7-8% perf drop compared to the same child query without null counts for these columns. Change-Id: Ic68f8b4c3756eb1980ce299a602a7d56db1e507a Reviewed-on: http://gerrit.cloudera.org:8080/11565 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
49 lines
1.2 KiB
Plaintext
49 lines
1.2 KiB
Plaintext
====
|
|
---- QUERY
|
|
create table alltypessmall_hbase like functional_hbase.alltypessmall
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
compute incremental stats alltypessmall_hbase
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 13 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats alltypessmall_hbase
|
|
---- LABELS
|
|
REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
regex:.+,'',regex:.+,regex:.+B
|
|
regex:.+,'1',regex:.+,regex:.+B
|
|
regex:.+,'3',regex:.+,regex:.+B
|
|
regex:.+,'5',regex:.+,regex:.+B
|
|
regex:.+,'7',regex:.+,regex:.+B
|
|
regex:.+,'9',regex:.+,regex:.+B
|
|
'Total','',regex:.+,regex:.+B
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats alltypessmall_hbase
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',99,0,4,4
|
|
'bigint_col','BIGINT',10,0,8,8
|
|
'bool_col','BOOLEAN',2,0,1,1
|
|
'date_string_col','STRING',12,0,8,8
|
|
'double_col','DOUBLE',10,0,8,8
|
|
'float_col','FLOAT',10,0,4,4
|
|
'int_col','INT',10,0,4,4
|
|
'month','INT',4,0,4,4
|
|
'smallint_col','SMALLINT',10,0,2,2
|
|
'string_col','STRING',10,0,1,1
|
|
'timestamp_col','TIMESTAMP',100,0,16,16
|
|
'tinyint_col','TINYINT',10,0,1,1
|
|
'year','INT',1,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
|
|
==== |