mirror of
https://github.com/apache/impala.git
synced 2025-12-31 15:00:10 -05:00
With IMPALA-1033 we disabled the counting of the number of NULLs in each column, and that gave a 2x speed-up in the computation. But erroneously the value 0 was being placed in the number of NULLs, instead of the correct -1 that indicates 'unknown'. Change-Id: Ib882eb2a87e7e2469f606081cb2881461b441a45 Reviewed-on: http://gerrit.ent.cloudera.com:8080/3377 Reviewed-by: Ippokratis Pandis <ipandis@cloudera.com> Tested-by: jenkins Reviewed-on: http://gerrit.ent.cloudera.com:8080/3378
66 lines
1.8 KiB
Plaintext
66 lines
1.8 KiB
Plaintext
====
|
|
---- QUERY
|
|
# test compute stats on a partitioned decimal text table
|
|
create table compute_stats_db.decimal_tbl like functional.decimal_tbl;
|
|
insert into compute_stats_db.decimal_tbl partition(d6)
|
|
select * from functional.decimal_tbl;
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.decimal_tbl
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 5 column(s).'
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.decimal_tbl
|
|
---- LABELS
|
|
d6, #Rows, #Files, Size, Bytes Cached, Format
|
|
---- RESULTS
|
|
1,5,1,'375B','NOT CACHED','TEXT'
|
|
Total,5,1,'375B','0B',''
|
|
---- TYPES
|
|
DECIMAL, BIGINT, BIGINT, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.decimal_tbl
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'd1','DECIMAL(9,0)',4,-1,4,4
|
|
'd2','DECIMAL(10,0)',3,-1,8,8
|
|
'd3','DECIMAL(20,10)',5,-1,16,16
|
|
'd4','DECIMAL(38,38)',1,-1,16,16
|
|
'd5','DECIMAL(10,5)',5,-1,8,8
|
|
'd6','DECIMAL(9,0)',1,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# test compute stats on a mixed-type parquet table
|
|
create table compute_stats_db.mixed_types(a int, b decimal(10,0)) stored as parquet;
|
|
insert into compute_stats_db.mixed_types values (1, 2), (3, 4);
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.mixed_types
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 2 column(s).'
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.mixed_types
|
|
---- LABELS
|
|
#Rows, #Files, Size, Bytes Cached, Format
|
|
---- RESULTS
|
|
2,1,regex:.+B,'NOT CACHED','PARQUET'
|
|
---- TYPES
|
|
BIGINT, BIGINT, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.mixed_types
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'a','INT',2,-1,4,4
|
|
'b','DECIMAL(10,0)',2,-1,8,8
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|