mirror of
https://github.com/apache/impala.git
synced 2026-01-02 03:00:32 -05:00
A compute stats command computes the table and column stats for a given table and persists them in the metastore. The table stats consist of the per-partition and per-table row count. The column stats are computed on a per-table basis and consist of the number of distinct values and the number of NULLs per column. This patch introduces a new 'child query' concept that compute stats utilizes. Child queries are cancelled if the parent query is cancelled. A compute stats stmt is executed by the following query hirarchy: parent: compute stats query (DDL) - child: compute table stats query (QUERY) - child: compute column stats query (QUERY) The new child query concept is necessary to decouple child query fetches from parent query fetches, i.e., we could not execute a child query as part of the original compute stats query, because then a client could fetch the results we need for updating the Metastore statistics. The reason why our existing CTAS works without this decoupling is that its insert 'child query' is not fetchable. Change-Id: I560533e3cb09bcbbdb3eea7fcf0b460bc6b36dcd Reviewed-on: http://gerrit.ent.cloudera.com:8080/873 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: jenkins
368 lines
7.5 KiB
Plaintext
368 lines
7.5 KiB
Plaintext
====
|
|
---- QUERY
|
|
# use functional
|
|
use functional
|
|
====
|
|
---- QUERY
|
|
# Simple pattern literal
|
|
show tables "alltypes"
|
|
---- RESULTS
|
|
'alltypes'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Pattern with wildcard
|
|
show tables "all*ypes"
|
|
---- RESULTS
|
|
'alltypes'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Empty string matches nothing
|
|
show tables ""
|
|
---- RESULTS
|
|
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Check that regex metacharacters are being escaped
|
|
show tables '+alltypes'
|
|
---- RESULTS
|
|
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Fully qualified names will not match
|
|
show tables "functional.alltypes"
|
|
---- RESULTS
|
|
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Database names are case insensitive.
|
|
USE FUNCTIONAL
|
|
---- RESULTS
|
|
|
|
====
|
|
---- QUERY
|
|
# Different database
|
|
# We have no SHOW TABLES with no pattern test since the set of tables is
|
|
# changing often, and may be different depending on whether benchmark data is
|
|
# loaded, for example.
|
|
show tables '*'
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'alltypes'
|
|
'alltypesagg'
|
|
'alltypesaggmultifiles'
|
|
'alltypesaggmultifilesnopart'
|
|
'alltypesaggnonulls'
|
|
'alltypeserror'
|
|
'alltypeserrornonulls'
|
|
'alltypesinsert'
|
|
'alltypesmixedformat'
|
|
'alltypesnopart'
|
|
'alltypesnopart_insert'
|
|
'alltypessmall'
|
|
'alltypestiny'
|
|
'array_table'
|
|
'dimtbl'
|
|
'emptytable'
|
|
'escapechartesttable'
|
|
'greptiny'
|
|
'insert_overwrite_nopart'
|
|
'insert_overwrite_partitioned'
|
|
'jointbl'
|
|
'liketbl'
|
|
'map_table'
|
|
'nullescapedtable'
|
|
'nullinsert'
|
|
'nullinsert_alt'
|
|
'nulltable'
|
|
'overflow'
|
|
'rankingssmall'
|
|
'stringpartitionkey'
|
|
'tblwithraggedcolumns'
|
|
'testtbl'
|
|
'text_comma_backslash_newline'
|
|
'text_dollar_hash_pipe'
|
|
'tinyinttable'
|
|
'tinytable'
|
|
'uservisitssmall'
|
|
'zipcode_incomes'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Choice amongst patterns
|
|
show tables "alltypes|stringpartitionkey|testtbl"
|
|
---- RESULTS
|
|
'alltypes'
|
|
'stringpartitionkey'
|
|
'testtbl'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Show tables in
|
|
show tables in functional
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'alltypes'
|
|
'alltypesagg'
|
|
'alltypesaggmultifiles'
|
|
'alltypesaggmultifilesnopart'
|
|
'alltypesaggnonulls'
|
|
'alltypeserror'
|
|
'alltypeserrornonulls'
|
|
'alltypesinsert'
|
|
'alltypesmixedformat'
|
|
'alltypesnopart'
|
|
'alltypesnopart_insert'
|
|
'alltypessmall'
|
|
'alltypestiny'
|
|
'array_table'
|
|
'dimtbl'
|
|
'emptytable'
|
|
'escapechartesttable'
|
|
'greptiny'
|
|
'insert_overwrite_nopart'
|
|
'insert_overwrite_partitioned'
|
|
'jointbl'
|
|
'liketbl'
|
|
'map_table'
|
|
'nullescapedtable'
|
|
'nullinsert'
|
|
'nullinsert_alt'
|
|
'nulltable'
|
|
'overflow'
|
|
'rankingssmall'
|
|
'stringpartitionkey'
|
|
'tblwithraggedcolumns'
|
|
'testtbl'
|
|
'text_comma_backslash_newline'
|
|
'text_dollar_hash_pipe'
|
|
'tinyinttable'
|
|
'tinytable'
|
|
'uservisitssmall'
|
|
'zipcode_incomes'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Show tables in with a pattern
|
|
show tables in functional like 'alltypesagg*'
|
|
---- RESULTS
|
|
'alltypesagg'
|
|
'alltypesaggmultifiles'
|
|
'alltypesaggmultifilesnopart'
|
|
'alltypesaggnonulls'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Coverage of syntax variations.
|
|
show tables in functional 'alltypesagg'
|
|
---- RESULTS
|
|
'alltypesagg'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show tables in functional like 'alltypesagg'
|
|
---- RESULTS
|
|
'alltypesagg'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show tables 'alltypesagg'
|
|
---- RESULTS
|
|
'alltypesagg'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show tables like 'alltypesagg'
|
|
---- RESULTS
|
|
'alltypesagg'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Show databases
|
|
show databases like 'tpcds'
|
|
---- RESULTS
|
|
'tpcds'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show databases like 'functional'
|
|
---- RESULTS
|
|
'functional'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show databases like 'functional'
|
|
---- RESULTS
|
|
'functional'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Stats on a partitioned Hdfs table stored as text
|
|
show table stats alltypes
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, FORMAT
|
|
---- RESULTS
|
|
2009,1,310,1,'19.95KB','TEXT'
|
|
2009,2,280,1,'18.12KB','TEXT'
|
|
2009,3,310,1,'20.06KB','TEXT'
|
|
2009,4,300,1,'19.61KB','TEXT'
|
|
2009,5,310,1,'20.36KB','TEXT'
|
|
2009,6,300,1,'19.71KB','TEXT'
|
|
2009,7,310,1,'20.36KB','TEXT'
|
|
2009,8,310,1,'20.36KB','TEXT'
|
|
2009,9,300,1,'19.71KB','TEXT'
|
|
2009,10,310,1,'20.36KB','TEXT'
|
|
2009,11,300,1,'19.71KB','TEXT'
|
|
2009,12,310,1,'20.36KB','TEXT'
|
|
2010,1,310,1,'20.36KB','TEXT'
|
|
2010,2,280,1,'18.39KB','TEXT'
|
|
2010,3,310,1,'20.36KB','TEXT'
|
|
2010,4,300,1,'19.71KB','TEXT'
|
|
2010,5,310,1,'20.36KB','TEXT'
|
|
2010,6,300,1,'19.71KB','TEXT'
|
|
2010,7,310,1,'20.36KB','TEXT'
|
|
2010,8,310,1,'20.36KB','TEXT'
|
|
2010,9,300,1,'19.71KB','TEXT'
|
|
2010,10,310,1,'20.36KB','TEXT'
|
|
2010,11,300,1,'19.71KB','TEXT'
|
|
2010,12,310,1,'20.36KB','TEXT'
|
|
Total,,7300,24,'478.45KB',''
|
|
---- TYPES
|
|
INT, INT, BIGINT, BIGINT, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Stats on an unpartitioned Hdfs table stored as text
|
|
show table stats alltypesaggmultifilesnopart
|
|
---- LABELS
|
|
#ROWS, #FILES, SIZE, FORMAT
|
|
---- RESULTS
|
|
10000,4,'734.18KB','TEXT'
|
|
---- TYPES
|
|
BIGINT, BIGINT, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Stats on an Hdfs with mixed partition formats
|
|
show table stats alltypesmixedformat
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, FORMAT
|
|
---- RESULTS
|
|
2009,1,310,1,'19.59KB','TEXT'
|
|
2009,2,0,1,'21.35KB','SEQUENCE_FILE'
|
|
2009,3,0,1,'17.42KB','RC_FILE'
|
|
Total,,310,3,'58.36KB',''
|
|
---- TYPES
|
|
INT, INT, BIGINT, BIGINT, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Stats on a table that has no statistics
|
|
show table stats functional_parquet.alltypes
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, FORMAT
|
|
---- RESULTS
|
|
2009,1,-1,1,regex:.+KB,'PARQUET'
|
|
2009,2,-1,1,regex:.+KB,'PARQUET'
|
|
2009,3,-1,1,regex:.+KB,'PARQUET'
|
|
2009,4,-1,1,regex:.+KB,'PARQUET'
|
|
2009,5,-1,1,regex:.+KB,'PARQUET'
|
|
2009,6,-1,1,regex:.+KB,'PARQUET'
|
|
2009,7,-1,1,regex:.+KB,'PARQUET'
|
|
2009,8,-1,1,regex:.+KB,'PARQUET'
|
|
2009,9,-1,1,regex:.+KB,'PARQUET'
|
|
2009,10,-1,1,regex:.+KB,'PARQUET'
|
|
2009,11,-1,1,regex:.+KB,'PARQUET'
|
|
2009,12,-1,1,regex:.+KB,'PARQUET'
|
|
2010,1,-1,1,regex:.+KB,'PARQUET'
|
|
2010,2,-1,1,regex:.+KB,'PARQUET'
|
|
2010,3,-1,1,regex:.+KB,'PARQUET'
|
|
2010,4,-1,1,regex:.+KB,'PARQUET'
|
|
2010,5,-1,1,regex:.+KB,'PARQUET'
|
|
2010,6,-1,1,regex:.+KB,'PARQUET'
|
|
2010,7,-1,1,regex:.+KB,'PARQUET'
|
|
2010,8,-1,1,regex:.+KB,'PARQUET'
|
|
2010,9,-1,1,regex:.+KB,'PARQUET'
|
|
2010,10,-1,1,regex:.+KB,'PARQUET'
|
|
2010,11,-1,1,regex:.+KB,'PARQUET'
|
|
2010,12,-1,1,regex:.+KB,'PARQUET'
|
|
Total,,-1,24,regex:.+KB,''
|
|
---- TYPES
|
|
INT, INT, BIGINT, BIGINT, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Stats on an HBase table
|
|
show table stats functional_hbase.alltypesagg
|
|
---- LABELS
|
|
REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
regex:.+,'',4,'1.18KB'
|
|
regex:.+,'1',4044,'1.09MB'
|
|
regex:.+,'3',4021,'1.09MB'
|
|
regex:.+,'5',4041,'1.09MB'
|
|
regex:.+,'7',4039,'1.09MB'
|
|
regex:.+,'9',2014,'561.71KB'
|
|
'Total','',18163,'4.93MB'
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, STRING
|
|
====
|
|
---- QUERY
|
|
# Column stats on an HdfsTable
|
|
show column stats alltypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',9299,0,4,4
|
|
'bool_col','BOOLEAN',2,0,1,1
|
|
'tinyint_col','TINYINT',11,0,1,1
|
|
'smallint_col','SMALLINT',11,0,2,2
|
|
'int_col','INT',11,0,4,4
|
|
'bigint_col','BIGINT',7,0,8,8
|
|
'float_col','FLOAT',15,0,4,4
|
|
'double_col','DOUBLE',11,0,8,8
|
|
'date_string_col','STRING',822,0,8,8
|
|
'string_col','STRING',9,0,1,1
|
|
'timestamp_col','TIMESTAMP',7171,0,16,16
|
|
'year','INT',2,0,4,4
|
|
'month','INT',12,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Column stats on an HBaseTable
|
|
show column stats functional_hbase.alltypessmall
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',63,0,4,4
|
|
'bigint_col','BIGINT',7,0,8,8
|
|
'bool_col','BOOLEAN',2,0,1,1
|
|
'date_string_col','STRING',12,0,8,8
|
|
'double_col','DOUBLE',11,0,8,8
|
|
'float_col','FLOAT',15,0,4,4
|
|
'int_col','INT',11,0,4,4
|
|
'month','INT',5,0,4,4
|
|
'smallint_col','SMALLINT',11,0,2,2
|
|
'string_col','STRING',9,0,1,1
|
|
'timestamp_col','TIMESTAMP',107,0,16,16
|
|
'tinyint_col','TINYINT',11,0,1,1
|
|
'year','INT',1,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE
|
|
====
|