mirror of
https://github.com/apache/impala.git
synced 2026-01-29 12:00:20 -05:00
It was disabled for performance reasons (IMPALA-1003) and this patch re-enables it since a lot of codegen improvements have happened since then. This patch switches the aggregation to use the CASE conditional instead of IF since the former has proper codegen support (IMPALA-7655). Tests: ===== - Updated the affected tests to include the null counts. - Added unit tests that verify IS [NOT] NULL predicates' cardinality estimation. Perf note: ========= I reran the compute stats child query with null counts included on the store_sales table from 1000 SF (1TB) tpcds dataset. The table had 22 non-partitioned columns (on which null counts were computed) and ~2.8B rows. This experiment showed around 7-8% perf drop compared to the same child query without null counts for these columns. Change-Id: Ic68f8b4c3756eb1980ce299a602a7d56db1e507a Reviewed-on: http://gerrit.cloudera.org:8080/11565 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
133 lines
3.8 KiB
Plaintext
133 lines
3.8 KiB
Plaintext
====
|
|
---- QUERY
|
|
# test computing stats on an HBase table
|
|
create table alltypessmall_hbase like functional_hbase.alltypessmall
|
|
====
|
|
---- QUERY
|
|
compute stats alltypessmall_hbase
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 13 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show table stats alltypessmall_hbase
|
|
---- LABELS
|
|
REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
regex:.+,'',regex:.+,regex:.+B
|
|
regex:.+,'1',regex:.+,regex:.+B
|
|
regex:.+,'3',regex:.+,regex:.+B
|
|
regex:.+,'5',regex:.+,regex:.+B
|
|
regex:.+,'7',regex:.+,regex:.+B
|
|
regex:.+,'9',regex:.+,regex:.+B
|
|
'Total','',regex:.+,regex:.+B
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats alltypessmall_hbase
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',99,0,4,4
|
|
'bigint_col','BIGINT',10,0,8,8
|
|
'bool_col','BOOLEAN',2,0,1,1
|
|
'date_string_col','STRING',12,0,8,8
|
|
'double_col','DOUBLE',10,0,8,8
|
|
'float_col','FLOAT',10,0,4,4
|
|
'int_col','INT',10,0,4,4
|
|
'month','INT',4,0,4,4
|
|
'smallint_col','SMALLINT',10,0,2,2
|
|
'string_col','STRING',10,0,1,1
|
|
'timestamp_col','TIMESTAMP',100,0,16,16
|
|
'tinyint_col','TINYINT',10,0,1,1
|
|
'year','INT',1,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# test computing stats on an binary HBase table
|
|
create table alltypessmall_hbase_bin like functional_hbase.alltypessmallbinary
|
|
====
|
|
---- QUERY
|
|
compute stats alltypessmall_hbase_bin
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 13 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY: VERIFY_IS_EQUAL
|
|
show table stats alltypessmall_hbase_bin
|
|
---- LABELS
|
|
REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE
|
|
---- RESULTS
|
|
regex:.+,'',regex:.+,regex:.+
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats alltypessmall_hbase_bin
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',99,0,4,4
|
|
'bigint_col','BIGINT',10,0,8,8
|
|
'bool_col','BOOLEAN',2,0,1,1
|
|
'date_string_col','STRING',12,0,8,8
|
|
'double_col','DOUBLE',10,0,8,8
|
|
'float_col','FLOAT',10,0,4,4
|
|
'int_col','INT',10,0,4,4
|
|
'month','INT',4,0,4,4
|
|
'smallint_col','SMALLINT',10,0,2,2
|
|
'string_col','STRING',10,0,1,1
|
|
'timestamp_col','TIMESTAMP',100,0,16,16
|
|
'tinyint_col','TINYINT',10,0,1,1
|
|
'year','INT',1,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# IMP-1227: Test computing stats on an HBase table that has a
|
|
# complex-typed column that Impala does not yet support.
|
|
create table allcomplextypes_hbase like functional_hbase.allcomplextypes
|
|
====
|
|
---- QUERY
|
|
compute stats allcomplextypes_hbase
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 3 column(s).'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY: VERIFY_IS_EQUAL
|
|
show table stats allcomplextypes_hbase
|
|
---- LABELS
|
|
REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE
|
|
---- RESULTS
|
|
regex:.+,'',regex:.+,regex:.+
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats allcomplextypes_hbase
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',0,0,4,4
|
|
'array_array_col','ARRAY<ARRAY<INT>>',-1,-1,-1,-1
|
|
'array_map_col','MAP<STRING,ARRAY<INT>>',-1,-1,-1,-1
|
|
'complex_nested_struct_col','STRUCT<f1:INT,f2:ARRAY<STRUCT<f11:BIGINT,f12:MAP<STRING,STRUCT<f21:BIGINT>>>>>',-1,-1,-1,-1
|
|
'complex_struct_col','STRUCT<f1:INT,f2:ARRAY<INT>,f3:MAP<STRING,INT>>',-1,-1,-1,-1
|
|
'int_array_col','ARRAY<INT>',-1,-1,-1,-1
|
|
'int_map_col','MAP<STRING,INT>',-1,-1,-1,-1
|
|
'int_struct_col','STRUCT<f1:INT,f2:INT>',-1,-1,-1,-1
|
|
'map_array_col','ARRAY<MAP<STRING,INT>>',-1,-1,-1,-1
|
|
'map_map_col','MAP<STRING,MAP<STRING,INT>>',-1,-1,-1,-1
|
|
'month','INT',0,0,4,4
|
|
'nested_struct_col','STRUCT<f1:INT,f2:STRUCT<f11:BIGINT,f12:STRUCT<f21:BIGINT>>>',-1,-1,-1,-1
|
|
'struct_array_col','ARRAY<STRUCT<f1:BIGINT,f2:STRING>>',-1,-1,-1,-1
|
|
'struct_map_col','MAP<STRING,STRUCT<f1:BIGINT,f2:STRING>>',-1,-1,-1,-1
|
|
'year','INT',0,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
|
|
==== |