Files
impala/testdata/workloads/functional-query/queries/QueryTest/compute-stats-decimal.test
Martin Grund cee1e84c1e IMPALA-1587: Extending caching directives for multiple replicas
This patch adds the possibility to specify the number of replicas that
should be cached in main memory. This can be useful in high QPS
scenarios as the majority of the load is no longer the single cached
replica, but a set of cached replicas. While the cache replication
factor can be larger than the block replication factor on disk, the
difference will be ignored by HDFS until more replicas become
available.

This extends the current syntax for specifying the cache pool in the
following way:

   cached in 'poolName'

is extended with the optional replication factor

   cached in 'poolName' with replication = XX

By default, the cache replication factor is set to 1. As this value is
not yet configurable in HDFS it's defined as a constant in the JniCatalog
thrift specification. If a partitioned table is cached, all its child
partitions inherit this cache replication factor. If child partitions
have a custom cache replication factor, changing the cache replication
factor on the partitioned table afterwards will overwrite this custom
value. If a new partition is added to the table, it will again inherit
the cache replication factor of the parent independent of the cache pool
that is used to cache the partition.

To review changes and status of the replication factor for tables and
partitions the replication factor is part of output of the "show
partitions" command.

Change-Id: I2aee63258d6da14fb5ce68574c6b070cf948fb4d
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/5533
Tested-by: jenkins
Reviewed-by: Martin Grund <mgrund@cloudera.com>
2015-01-26 20:30:59 -08:00

66 lines
1.9 KiB
Plaintext

====
---- QUERY
# test compute stats on a partitioned decimal text table
create table compute_stats_db.decimal_tbl like functional.decimal_tbl;
insert into compute_stats_db.decimal_tbl partition(d6)
select * from functional.decimal_tbl;
====
---- QUERY
compute stats compute_stats_db.decimal_tbl
---- RESULTS
'Updated 1 partition(s) and 5 column(s).'
====
---- QUERY
show table stats compute_stats_db.decimal_tbl
---- LABELS
d6, #Rows, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental Stats
---- RESULTS
'1',5,1,'375B','NOT CACHED','NOT CACHED','TEXT','false'
'Total',5,1,'375B','0B','','',''
---- TYPES
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.decimal_tbl
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'd1','DECIMAL(9,0)',4,-1,4,4
'd2','DECIMAL(10,0)',3,-1,8,8
'd3','DECIMAL(20,10)',5,-1,16,16
'd4','DECIMAL(38,38)',1,-1,16,16
'd5','DECIMAL(10,5)',5,-1,8,8
'd6','DECIMAL(9,0)',1,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====
---- QUERY
# test compute stats on a mixed-type parquet table
create table compute_stats_db.mixed_types(a int, b decimal(10,0)) stored as parquet;
insert into compute_stats_db.mixed_types values (1, 2), (3, 4);
====
---- QUERY
compute stats compute_stats_db.mixed_types
---- RESULTS
'Updated 1 partition(s) and 2 column(s).'
====
---- QUERY
show table stats compute_stats_db.mixed_types
---- LABELS
#Rows, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental Stats
---- RESULTS
2,1,regex:.+B,'NOT CACHED','NOT CACHED','PARQUET','false'
---- TYPES
BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats compute_stats_db.mixed_types
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'a','INT',2,-1,4,4
'b','DECIMAL(10,0)',2,-1,8,8
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
====