mirror of
https://github.com/apache/impala.git
synced 2025-12-31 15:00:10 -05:00
The Impala CatalogService manages the caching and dissemination of cluster-wide metadata. The CatalogService combines the metadata from the Hive Metastore, the NameNode, and potentially additional sources in the future. The CatalogService uses the StateStore to broadcast metadata updates across the cluster. The CatalogService also directly handles executing metadata updates request from impalad servers (DDL requests). It exposes a Thrift interface to allow impalads to directly connect execute their DDL operations. The CatalogService has two main components - a C++ server that implements StateStore integration, Thrift service implementiation, and exporting of the debug webpage/metrics. The other main component is the Java Catalog that manages caching and updating of of all the metadata. For each StateStore heartbeat, a delta of all metadata updates is broadcast to the rest of the cluster. Some Notes On the Changes --- * The metadata is all sent as thrift structs. To do this all catalog objects (Tables/Views, Databases, UDFs) have thrift struct to represent them. These are sent with each statestore delta update. * The existing Catalog class has been seperated into two seperate sub-classes. An ImpladCatalog and a CatalogServiceCatalog. See the comments on those classes for more details. What is working: * New CatalogService created * Working with statestore delta updates and latest UDF changes * DDL performed on Node 1 is now visible on all other nodes without a "refresh". * Each DDL operation against the Catalog Service will return the catalog version that contains the change. An impalad will wait for the statestore heartbeat that contains this version before returning from the DDL comment. * All table types (Hbase, Hdfs, Views) getting their metadata propagated properly * Block location information included in CS updates and used by Impalads * Column and table stats included in CS updates and used by Impalads * Query tests are all passing Still TODO: * Directly return catalog object metadata from DDL requests * Poll the Hive Metastore to detect new/dropped/modified tables * Reorganize the FE code for the Catalog Service. I don't think we want everything in the same JAR. Change-Id: I8c61296dac28fb98bcfdc17361f4f141d3977eda Reviewed-on: http://gerrit.ent.cloudera.com:8080/601 Reviewed-by: Lenni Kuff <lskuff@cloudera.com> Tested-by: Lenni Kuff <lskuff@cloudera.com>
181 lines
3.7 KiB
Plaintext
181 lines
3.7 KiB
Plaintext
====
|
|
---- QUERY
|
|
create database insert_permutation_test
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
use insert_permutation_test
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
create table perm_nopart(int_col1 int, string_col string, int_col2 int);
|
|
create table perm_part(int_col1 int, string_col string) partitioned by (p1 int, p2 string);
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Simple non-permutation
|
|
insert into perm_nopart(int_col1, string_col, int_col2) values(1,'str',2)
|
|
---- SETUP
|
|
RESET insert_permutation_test.perm_nopart
|
|
---- RESULTS
|
|
: 1
|
|
====
|
|
---- QUERY
|
|
select * from perm_nopart
|
|
---- RESULTS
|
|
1,'str',2
|
|
---- TYPES
|
|
INT,STRING,INT
|
|
====
|
|
---- QUERY
|
|
# Permute the int columns
|
|
insert into perm_nopart(int_col2, string_col, int_col1) values(1,'str',2)
|
|
---- SETUP
|
|
RESET insert_permutation_test.perm_nopart
|
|
---- RESULTS
|
|
: 1
|
|
====
|
|
---- QUERY
|
|
select * from perm_nopart
|
|
---- RESULTS
|
|
2,'str',1
|
|
---- TYPES
|
|
INT,STRING,INT
|
|
====
|
|
---- QUERY
|
|
# Leave out two columns, check they are assigned NULL
|
|
insert into perm_nopart(int_col2) values(1)
|
|
---- SETUP
|
|
RESET insert_permutation_test.perm_nopart
|
|
---- RESULTS
|
|
: 1
|
|
====
|
|
---- QUERY
|
|
select * from perm_nopart
|
|
---- RESULTS
|
|
NULL,'NULL',1
|
|
---- TYPES
|
|
INT,STRING,INT
|
|
====
|
|
---- QUERY
|
|
# Permute the partition columns
|
|
insert into perm_part(p1, string_col, int_col1, p2) values(10,'str',1, 'hello')
|
|
---- SETUP
|
|
RESET insert_permutation_test.perm_part
|
|
---- RESULTS
|
|
p1=10/p2=hello/: 1
|
|
====
|
|
---- QUERY
|
|
select * from perm_part
|
|
---- RESULTS
|
|
1,'str',10,'hello'
|
|
---- TYPES
|
|
INT,STRING,INT,STRING
|
|
====
|
|
---- QUERY
|
|
# Same thing - permute the partition columns, but invert their order relative to Hive
|
|
insert into perm_part(p2, string_col, int_col1, p1) values('hello','str',1, 10)
|
|
---- SETUP
|
|
RESET insert_permutation_test.perm_part
|
|
---- RESULTS
|
|
p1=10/p2=hello/: 1
|
|
====
|
|
---- QUERY
|
|
select * from perm_part
|
|
---- RESULTS
|
|
1,'str',10,'hello'
|
|
---- TYPES
|
|
INT,STRING,INT,STRING
|
|
====
|
|
---- QUERY
|
|
# Check NULL if only partition keys are mentioned
|
|
insert into perm_part(p2, p1) values('hello', 10)
|
|
---- SETUP
|
|
RESET insert_permutation_test.perm_part
|
|
---- RESULTS
|
|
p1=10/p2=hello/: 1
|
|
====
|
|
---- QUERY
|
|
select * from perm_part
|
|
---- RESULTS
|
|
NULL,'NULL',10,'hello'
|
|
---- TYPES
|
|
INT,STRING,INT,STRING
|
|
====
|
|
---- QUERY
|
|
# Check NULL if only partition keys are mentioned, one static
|
|
insert into perm_part(p2) PARTITION(p1=10) values('hello')
|
|
---- SETUP
|
|
RESET insert_permutation_test.perm_part
|
|
---- RESULTS
|
|
p1=10/p2=hello/: 1
|
|
====
|
|
---- QUERY
|
|
select * from perm_part
|
|
---- RESULTS
|
|
NULL,'NULL',10,'hello'
|
|
---- TYPES
|
|
INT,STRING,INT,STRING
|
|
====
|
|
---- QUERY
|
|
# Check dynamic keys mentioned in the PARTITION column are still looked for at the end of
|
|
# the select-list
|
|
insert into perm_part(int_col1, string_col) PARTITION(p1=10, p2) values(1,'perm_col','part_col')
|
|
---- SETUP
|
|
RESET insert_permutation_test.perm_part
|
|
---- RESULTS
|
|
p1=10/p2=part_col/: 1
|
|
====
|
|
---- QUERY
|
|
select * from perm_part
|
|
---- RESULTS
|
|
1,'perm_col',10,'part_col'
|
|
---- TYPES
|
|
INT,STRING,INT,STRING
|
|
====
|
|
---- QUERY
|
|
# Check behaviour of empty permutation clause with no query statement
|
|
insert into perm_part() PARTITION(p1=10, p2='foo')
|
|
---- SETUP
|
|
RESET insert_permutation_test.perm_part
|
|
---- RESULTS
|
|
p1=10/p2=foo/: 1
|
|
====
|
|
---- QUERY
|
|
select * from perm_part
|
|
---- RESULTS
|
|
NULL,'NULL',10,'foo'
|
|
---- TYPES
|
|
INT,STRING,INT,STRING
|
|
====
|
|
---- QUERY
|
|
# Check behaviour of empty permutation clause
|
|
insert into perm_part() PARTITION(p1, p2='foo') values(5)
|
|
---- SETUP
|
|
RESET insert_permutation_test.perm_part
|
|
---- RESULTS
|
|
p1=5/p2=foo/: 1
|
|
====
|
|
---- QUERY
|
|
select * from perm_part
|
|
---- RESULTS
|
|
NULL,'NULL',5,'foo'
|
|
---- TYPES
|
|
INT,STRING,INT,STRING
|
|
====
|
|
---- QUERY
|
|
# Check behaviour of empty permutation clause with unpartitioned table
|
|
insert into perm_nopart()
|
|
---- SETUP
|
|
RESET insert_permutation_test.perm_nopart
|
|
---- RESULTS
|
|
: 1
|
|
====
|
|
---- QUERY
|
|
select * from perm_nopart
|
|
---- RESULTS
|
|
NULL,'NULL',NULL
|
|
---- TYPES
|
|
INT,STRING,INT
|
|
====
|