mirror of
https://github.com/apache/impala.git
synced 2026-01-09 06:05:09 -05:00
This patch adds the possibility to specify the number of replicas that should be cached in main memory. This can be useful in high QPS scenarios as the majority of the load is no longer the single cached replica, but a set of cached replicas. While the cache replication factor can be larger than the block replication factor on disk, the difference will be ignored by HDFS until more replicas become available. This extends the current syntax for specifying the cache pool in the following way: cached in 'poolName' is extended with the optional replication factor cached in 'poolName' with replication = XX By default, the cache replication factor is set to 1. As this value is not yet configurable in HDFS it's defined as a constant in the JniCatalog thrift specification. If a partitioned table is cached, all its child partitions inherit this cache replication factor. If child partitions have a custom cache replication factor, changing the cache replication factor on the partitioned table afterwards will overwrite this custom value. If a new partition is added to the table, it will again inherit the cache replication factor of the parent independent of the cache pool that is used to cache the partition. To review changes and status of the replication factor for tables and partitions the replication factor is part of output of the "show partitions" command. Change-Id: I2aee63258d6da14fb5ce68574c6b070cf948fb4d Reviewed-on: http://gerrit.sjc.cloudera.com:8080/5533 Tested-by: jenkins Reviewed-by: Martin Grund <mgrund@cloudera.com>
66 lines
1.9 KiB
Plaintext
66 lines
1.9 KiB
Plaintext
====
|
|
---- QUERY
|
|
# test compute stats on a partitioned decimal text table
|
|
create table compute_stats_db.decimal_tbl like functional.decimal_tbl;
|
|
insert into compute_stats_db.decimal_tbl partition(d6)
|
|
select * from functional.decimal_tbl;
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.decimal_tbl
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 5 column(s).'
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.decimal_tbl
|
|
---- LABELS
|
|
d6, #Rows, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental Stats
|
|
---- RESULTS
|
|
'1',5,1,'375B','NOT CACHED','NOT CACHED','TEXT','false'
|
|
'Total',5,1,'375B','0B','','',''
|
|
---- TYPES
|
|
STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.decimal_tbl
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'd1','DECIMAL(9,0)',4,-1,4,4
|
|
'd2','DECIMAL(10,0)',3,-1,8,8
|
|
'd3','DECIMAL(20,10)',5,-1,16,16
|
|
'd4','DECIMAL(38,38)',1,-1,16,16
|
|
'd5','DECIMAL(10,5)',5,-1,8,8
|
|
'd6','DECIMAL(9,0)',1,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# test compute stats on a mixed-type parquet table
|
|
create table compute_stats_db.mixed_types(a int, b decimal(10,0)) stored as parquet;
|
|
insert into compute_stats_db.mixed_types values (1, 2), (3, 4);
|
|
====
|
|
---- QUERY
|
|
compute stats compute_stats_db.mixed_types
|
|
---- RESULTS
|
|
'Updated 1 partition(s) and 2 column(s).'
|
|
====
|
|
---- QUERY
|
|
show table stats compute_stats_db.mixed_types
|
|
---- LABELS
|
|
#Rows, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental Stats
|
|
---- RESULTS
|
|
2,1,regex:.+B,'NOT CACHED','NOT CACHED','PARQUET','false'
|
|
---- TYPES
|
|
BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
show column stats compute_stats_db.mixed_types
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'a','INT',2,-1,4,4
|
|
'b','DECIMAL(10,0)',2,-1,8,8
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
|
|
====
|