Files
impala/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats.test
Shant Hovsepian 6d87fe090c Improve Hll estimate for small cardinalities.
Based on Google's HyperLogLog++ paper. Uses a bias correcting
interpolation as a sub algorithm for Hll estimates within a specific
range.

Change-Id: If4fe692b4308f6a57aea6167e9bc00db11eaaab9
Reviewed-on: http://gerrit.cloudera.org:8080/415
Tested-by: Internal Jenkins
Reviewed-by: Henry Robinson <henry@cloudera.com>
2015-07-16 19:38:17 +00:00

143 lines
4.2 KiB
Plaintext

====
---- QUERY
# test computing stats on an HBase table
create table compute_stats_db_hbase.alltypessmall_hbase
like alltypessmall;
====
---- QUERY
compute stats compute_stats_db_hbase.alltypessmall_hbase
---- RESULTS
'Updated 1 partition(s) and 13 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats compute_stats_db_hbase.alltypessmall_hbase
---- LABELS
REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE
---- RESULTS: VERIFY_IS_EQUAL
regex:.+,'',regex:.+,regex:.+KB
regex:.+,'1',regex:.+,regex:.+KB
regex:.+,'3',regex:.+,regex:.+KB
regex:.+,'5',regex:.+,regex:.+KB
regex:.+,'7',regex:.+,regex:.+KB
regex:.+,'9',regex:.+,regex:.+KB
'Total','',regex:.+,regex:.+KB
---- TYPES
STRING, STRING, BIGINT, STRING
====
---- QUERY
show column stats compute_stats_db_hbase.alltypessmall_hbase
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',99,-1,4,4
'bigint_col','BIGINT',10,-1,8,8
'bool_col','BOOLEAN',2,-1,1,1
'date_string_col','STRING',12,-1,8,8
'double_col','DOUBLE',10,-1,8,8
'float_col','FLOAT',10,-1,4,4
'int_col','INT',10,-1,4,4
'month','INT',4,-1,4,4
'smallint_col','SMALLINT',10,-1,2,2
'string_col','STRING',10,-1,1,1
'timestamp_col','TIMESTAMP',101,-1,16,16
'tinyint_col','TINYINT',10,-1,1,1
'year','INT',1,-1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# test computing stats on an binary HBase table
create table compute_stats_db_hbase.alltypessmall_hbase_bin
like alltypessmallbinary;
====
---- QUERY
compute stats compute_stats_db_hbase.alltypessmall_hbase_bin
---- RESULTS
'Updated 1 partition(s) and 13 column(s).'
---- TYPES
STRING
====
---- QUERY: VERIFY_IS_EQUAL
show table stats compute_stats_db_hbase.alltypessmall_hbase_bin
---- LABELS
REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE
---- RESULTS
regex:.+,'',regex:.+,regex:.+
---- TYPES
STRING, STRING, BIGINT, STRING
====
---- QUERY
show column stats compute_stats_db_hbase.alltypessmall_hbase_bin
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',99,-1,4,4
'bigint_col','BIGINT',10,-1,8,8
'bool_col','BOOLEAN',2,-1,1,1
'date_string_col','STRING',12,-1,8,8
'double_col','DOUBLE',10,-1,8,8
'float_col','FLOAT',10,-1,4,4
'int_col','INT',10,-1,4,4
'month','INT',4,-1,4,4
'smallint_col','SMALLINT',10,-1,2,2
'string_col','STRING',10,-1,1,1
'timestamp_col','TIMESTAMP',101,-1,16,16
'tinyint_col','TINYINT',10,-1,1,1
'year','INT',1,-1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# IMP-1227: Test computing stats on an HBase table that has a
# complex-typed column that Impala does not yet support.
create table compute_stats_db_hbase.allcomplextypes
like allcomplextypes
====
---- QUERY
compute stats compute_stats_db_hbase.allcomplextypes
---- RESULTS
'Updated 1 partition(s) and 3 column(s).'
---- TYPES
STRING
====
---- QUERY: VERIFY_IS_EQUAL
show table stats compute_stats_db_hbase.allcomplextypes
---- LABELS
REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE
---- RESULTS
regex:.+,'',regex:.+,regex:.+
---- TYPES
STRING, STRING, BIGINT, STRING
====
---- QUERY
show column stats compute_stats_db_hbase.allcomplextypes
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,-1,4,4
'array_array_col','ARRAY<ARRAY<INT>>',-1,-1,-1,-1
'array_map_col','MAP<STRING,ARRAY<INT>>',-1,-1,-1,-1
'complex_nested_struct_col','STRUCT<f1:INT,f2:ARRAY<STRUCT<f11:BIGINT,f12:MAP<STRING,STRUCT<f21:BIGINT>>>>>',-1,-1,-1,-1
'complex_struct_col','STRUCT<f1:INT,f2:ARRAY<INT>,f3:MAP<STRING,INT>>',-1,-1,-1,-1
'int_array_col','ARRAY<INT>',-1,-1,-1,-1
'int_map_col','MAP<STRING,INT>',-1,-1,-1,-1
'int_struct_col','STRUCT<f1:INT,f2:INT>',-1,-1,-1,-1
'map_array_col','ARRAY<MAP<STRING,INT>>',-1,-1,-1,-1
'map_map_col','MAP<STRING,MAP<STRING,INT>>',-1,-1,-1,-1
'month','INT',0,-1,4,4
'nested_struct_col','STRUCT<f1:INT,f2:STRUCT<f11:BIGINT,f12:STRUCT<f21:BIGINT>>>',-1,-1,-1,-1
'struct_array_col','ARRAY<STRUCT<f1:BIGINT,f2:STRING>>',-1,-1,-1,-1
'struct_map_col','MAP<STRING,STRUCT<f1:BIGINT,f2:STRING>>',-1,-1,-1,-1
'year','INT',0,-1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
compute incremental stats alltypes;
---- RESULTS
'Updated 1 partition(s) and 13 column(s).'
---- TYPES
STRING